[llvm] r345462 - [NFC][X86] Baseline tests for AMD BdVer2 (Piledriver) Scheduler model
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 27 13:36:11 PDT 2018
Author: lebedevri
Date: Sat Oct 27 13:36:11 2018
New Revision: 345462
URL: http://llvm.org/viewvc/llvm-project?rev=345462&view=rev
Log:
[NFC][X86] Baseline tests for AMD BdVer2 (Piledriver) Scheduler model
Adding the baseline tests in a preparatory NFC commit,
so that the actual commit shows the *diff*.
Yes, i'm aware that a few of these codegen-based sched tests
are testing wrong instructions, i will fix that afterwards.
For https://reviews.llvm.org/D52779
Added:
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/add-sequence.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dot-product.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/instruction-info-view.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/one-idioms.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-5.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pr37790.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/rank.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/rcu-statistics.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-3.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-4.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-5.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-3dnow.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-adx.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-aes.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-bmi1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-clflushopt.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-cmov.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-cmpxchg.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-f16c.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-fma.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-fma4.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-lea.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-lzcnt.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-movbe.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-pclmul.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-popcnt.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-prefetchw.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse3.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse42.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-sse4a.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-ssse3.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-tbm.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-x86_32.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-x87.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-xop.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/scheduler-queue-usage.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/simple-test.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/zero-idioms.s
Modified:
llvm/trunk/test/CodeGen/X86/aes-schedule.ll
llvm/trunk/test/CodeGen/X86/avx-schedule.ll
llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll
llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
llvm/trunk/test/CodeGen/X86/cmov-schedule.ll
llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
llvm/trunk/test/CodeGen/X86/fma-schedule.ll
llvm/trunk/test/CodeGen/X86/fma4-schedule.ll
llvm/trunk/test/CodeGen/X86/lea32-schedule.ll
llvm/trunk/test/CodeGen/X86/lea64-schedule.ll
llvm/trunk/test/CodeGen/X86/lwp-schedule.ll
llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll
llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll
llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll
llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll
llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll
llvm/trunk/test/CodeGen/X86/sse-schedule.ll
llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll
llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
llvm/trunk/test/CodeGen/X86/tbm-schedule.ll
llvm/trunk/test/CodeGen/X86/x87-schedule.ll
llvm/trunk/test/CodeGen/X86/xop-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/bextr-read-after-ld.s
llvm/trunk/test/tools/llvm-mca/X86/cpus.s
llvm/trunk/test/tools/llvm-mca/X86/read-after-ld-1.s
llvm/trunk/test/tools/llvm-mca/X86/register-file-statistics.s
llvm/trunk/test/tools/llvm-mca/X86/scheduler-queue-usage.s
llvm/trunk/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s
llvm/trunk/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s
llvm/trunk/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s
Modified: llvm/trunk/test/CodeGen/X86/aes-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/aes-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/aes-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/aes-schedule.ll Sat Oct 27 13:36:11 2018
@@ -14,6 +14,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@@ -92,6 +94,18 @@ define <2 x i64> @test_aesdec(<2 x i64>
; SKX-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aesdec:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aesdec:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aesdec:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [3:1.00]
@@ -195,6 +209,18 @@ define <2 x i64> @test_aesdeclast(<2 x i
; SKX-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aesdeclast:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aesdeclast:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aesdeclast:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [3:1.00]
@@ -298,6 +324,18 @@ define <2 x i64> @test_aesenc(<2 x i64>
; SKX-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aesenc:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aesenc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aesenc:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [3:1.00]
@@ -401,6 +439,18 @@ define <2 x i64> @test_aesenclast(<2 x i
; SKX-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aesenclast:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aesenclast:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aesenclast:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [3:1.00]
@@ -517,6 +567,20 @@ define <2 x i64> @test_aesimc(<2 x i64>
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aesimc:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [12:2.00]
+; BDVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [18:2.00]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aesimc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [12:2.00]
+; BDVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [18:2.00]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aesimc:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [2:1.00]
@@ -637,6 +701,20 @@ define <2 x i64> @test_aeskeygenassist(<
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_aeskeygenassist:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:3.67]
+; BDVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:3.33]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_aeskeygenassist:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [8:3.67]
+; BDVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [8:3.33]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_aeskeygenassist:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [2:1.00]
Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Sat Oct 27 13:36:11 2018
@@ -6,6 +6,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -46,6 +47,12 @@ define <4 x double> @test_addpd(<4 x dou
; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_addpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_addpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
@@ -100,6 +107,12 @@ define <8 x float> @test_addps(<8 x floa
; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_addps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_addps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
@@ -154,6 +167,12 @@ define <4 x double> @test_addsubpd(<4 x
; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_addsubpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_addsubpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
@@ -209,6 +228,12 @@ define <8 x float> @test_addsubps(<8 x f
; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_addsubps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_addsubps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
@@ -270,6 +295,13 @@ define <4 x double> @test_andnotpd(<4 x
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andnotpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andnotpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -339,6 +371,13 @@ define <8 x float> @test_andnotps(<8 x f
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andnotps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andnotps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -408,6 +447,13 @@ define <4 x double> @test_andpd(<4 x dou
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -475,6 +521,13 @@ define <8 x float> @test_andps(<8 x floa
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -542,6 +595,13 @@ define <4 x double> @test_blendpd(<4 x d
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blendpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
+; BDVER2-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blendpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
@@ -605,6 +665,13 @@ define <8 x float> @test_blendps(<8 x fl
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blendps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
+; BDVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blendps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
@@ -662,6 +729,12 @@ define <4 x double> @test_blendvpd(<4 x
; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blendvpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; BDVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blendvpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
@@ -717,6 +790,12 @@ define <8 x float> @test_blendvps(<8 x f
; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blendvps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; BDVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blendvps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
@@ -766,6 +845,11 @@ define <8 x float> @test_broadcastf128(<
; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_broadcastf128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_broadcastf128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:2.00]
@@ -811,6 +895,11 @@ define <4 x double> @test_broadcastsd_ym
; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_broadcastsd_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_broadcastsd_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00]
@@ -857,6 +946,11 @@ define <4 x float> @test_broadcastss(flo
; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_broadcastss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_broadcastss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]
@@ -903,6 +997,11 @@ define <8 x float> @test_broadcastss_ymm
; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_broadcastss_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_broadcastss_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00]
@@ -961,6 +1060,13 @@ define <4 x double> @test_cmppd(<4 x dou
; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmppd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmppd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
@@ -1027,6 +1133,13 @@ define <8 x float> @test_cmpps(<8 x floa
; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
@@ -1093,6 +1206,13 @@ define <4 x double> @test_cvtdq2pd(<4 x
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtdq2pd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtdq2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00]
@@ -1158,6 +1278,13 @@ define <8 x float> @test_cvtdq2ps(<8 x i
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtdq2ps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtdq2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00]
@@ -1221,6 +1348,13 @@ define <8 x i32> @test_cvtpd2dq(<4 x dou
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtpd2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtpd2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
@@ -1285,6 +1419,13 @@ define <8 x i32> @test_cvttpd2dq(<4 x do
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvttpd2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvttpd2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
@@ -1348,6 +1489,13 @@ define <8 x float> @test_cvtpd2ps(<4 x d
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtpd2ps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtpd2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00]
@@ -1411,6 +1559,13 @@ define <8 x i32> @test_cvtps2dq(<8 x flo
; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtps2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtps2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [8:2.00]
@@ -1475,6 +1630,13 @@ define <8 x i32> @test_cvttps2dq(<8 x fl
; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvttps2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvttps2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:2.00]
@@ -1532,6 +1694,12 @@ define <4 x double> @test_divpd(<4 x dou
; SKX-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:8.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_divpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00]
+; BDVER2-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_divpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
@@ -1586,6 +1754,12 @@ define <8 x float> @test_divps(<8 x floa
; SKX-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:5.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_divps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00]
+; BDVER2-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_divps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
@@ -1640,6 +1814,12 @@ define <8 x float> @test_dpps(<8 x float
; SKX-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_dpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
+; BDVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_dpps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:6.00]
@@ -1701,6 +1881,13 @@ define <4 x float> @test_extractf128(<8
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_extractf128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_extractf128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.50]
@@ -1756,6 +1943,12 @@ define <4 x double> @test_haddpd(<4 x do
; SKX-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_haddpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_haddpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
@@ -1811,6 +2004,12 @@ define <8 x float> @test_haddps(<8 x flo
; SKX-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_haddps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_haddps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
@@ -1866,6 +2065,12 @@ define <4 x double> @test_hsubpd(<4 x do
; SKX-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_hsubpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_hsubpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
@@ -1921,6 +2126,12 @@ define <8 x float> @test_hsubps(<8 x flo
; SKX-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_hsubps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_hsubps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
@@ -1982,6 +2193,13 @@ define <8 x float> @test_insertf128(<8 x
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_insertf128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
+; BDVER2-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_insertf128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
@@ -2035,6 +2253,11 @@ define <32 x i8> @test_lddqu(i8* %a0) {
; SKX-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lddqu:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lddqu:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:1.00]
@@ -2092,6 +2315,13 @@ define <2 x double> @test_maskmovpd(i8*
; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maskmovpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
+; BDVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maskmovpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
@@ -2155,6 +2385,13 @@ define <4 x double> @test_maskmovpd_ymm(
; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maskmovpd_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
+; BDVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maskmovpd_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
@@ -2218,6 +2455,13 @@ define <4 x float> @test_maskmovps(i8* %
; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maskmovps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
+; BDVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maskmovps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
@@ -2281,6 +2525,13 @@ define <8 x float> @test_maskmovps_ymm(i
; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maskmovps_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
+; BDVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maskmovps_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
@@ -2338,6 +2589,12 @@ define <4 x double> @test_maxpd(<4 x dou
; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maxpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maxpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
@@ -2393,6 +2650,12 @@ define <8 x float> @test_maxps(<8 x floa
; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maxps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maxps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
@@ -2448,6 +2711,12 @@ define <4 x double> @test_minpd(<4 x dou
; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_minpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_minpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
@@ -2503,6 +2772,12 @@ define <8 x float> @test_minps(<8 x floa
; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_minps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_minps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
@@ -2564,6 +2839,13 @@ define <4 x double> @test_movapd(<4 x do
; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movapd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movapd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:1.00]
@@ -2626,6 +2908,13 @@ define <8 x float> @test_movaps(<8 x flo
; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movaps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movaps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:1.00]
@@ -2688,6 +2977,13 @@ define <4 x double> @test_movddup(<4 x d
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movddup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
+; BDVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movddup:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:2.00]
@@ -2745,6 +3041,12 @@ define i32 @test_movmskpd(<4 x double> %
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movmskpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movmskpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00]
@@ -2797,6 +3099,12 @@ define i32 @test_movmskps(<8 x float> %a
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movmskps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movmskps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
@@ -2861,6 +3169,14 @@ define void @test_movntdq(<4 x i64> %a0,
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movntdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2916,6 +3232,12 @@ define <4 x double> @test_movntpd(<4 x d
; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movntpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
@@ -2969,6 +3291,12 @@ define <8 x float> @test_movntps(<8 x fl
; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movntps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
@@ -3028,6 +3356,13 @@ define <8 x float> @test_movshdup(<8 x f
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movshdup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
+; BDVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movshdup:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:2.00]
@@ -3091,6 +3426,13 @@ define <8 x float> @test_movsldup(<8 x f
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movsldup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
+; BDVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movsldup:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:2.00]
@@ -3156,6 +3498,13 @@ define <4 x double> @test_movupd(<4 x do
; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movupd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movupd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:1.00]
@@ -3220,6 +3569,13 @@ define <8 x float> @test_movups(<8 x flo
; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movups:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
+; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movups:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:1.00]
@@ -3276,6 +3632,12 @@ define <4 x double> @test_mulpd(<4 x dou
; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_mulpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_mulpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:4.00]
@@ -3330,6 +3692,12 @@ define <8 x float> @test_mulps(<8 x floa
; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_mulps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_mulps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
@@ -3390,6 +3758,13 @@ define <4 x double> @orpd(<4 x double> %
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: orpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: orpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -3457,6 +3832,13 @@ define <8 x float> @test_orps(<8 x float
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_orps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_orps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -3524,6 +3906,13 @@ define <4 x double> @test_perm2f128(<4 x
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_perm2f128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
+; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_perm2f128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
@@ -3587,6 +3976,13 @@ define <2 x double> @test_permilpd(<2 x
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
+; BDVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00]
@@ -3650,6 +4046,13 @@ define <4 x double> @test_permilpd_ymm(<
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilpd_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
+; BDVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilpd_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:2.00]
@@ -3713,6 +4116,13 @@ define <4 x float> @test_permilps(<4 x f
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
+; BDVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
@@ -3776,6 +4186,13 @@ define <8 x float> @test_permilps_ymm(<8
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilps_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
+; BDVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilps_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:2.00]
@@ -3833,6 +4250,12 @@ define <2 x double> @test_permilvarpd(<2
; SKX-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilvarpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilvarpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
@@ -3888,6 +4311,12 @@ define <4 x double> @test_permilvarpd_ym
; SKX-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilvarpd_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilvarpd_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
@@ -3943,6 +4372,12 @@ define <4 x float> @test_permilvarps(<4
; SKX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilvarps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilvarps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
@@ -3998,6 +4433,12 @@ define <8 x float> @test_permilvarps_ymm
; SKX-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_permilvarps_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_permilvarps_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
@@ -4059,6 +4500,13 @@ define <8 x float> @test_rcpps(<8 x floa
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rcpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rcpps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [7:2.00]
@@ -4123,6 +4571,13 @@ define <4 x double> @test_roundpd(<4 x d
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_roundpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_roundpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:2.00]
@@ -4187,6 +4642,13 @@ define <8 x float> @test_roundps(<8 x fl
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_roundps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_roundps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:2.00]
@@ -4251,6 +4713,13 @@ define <8 x float> @test_rsqrtps(<8 x fl
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rsqrtps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00]
+; BDVER2-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rsqrtps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [7:2.00]
@@ -4315,6 +4784,13 @@ define <4 x double> @test_shufpd(<4 x do
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_shufpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
+; BDVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_shufpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
@@ -4378,6 +4854,13 @@ define <8 x float> @test_shufps(<8 x flo
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_shufps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
+; BDVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_shufps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
@@ -4441,6 +4924,13 @@ define <4 x double> @test_sqrtpd(<4 x do
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sqrtpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:44.00]
+; BDVER2-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:44.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sqrtpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [59:54.00]
@@ -4505,6 +4995,13 @@ define <8 x float> @test_sqrtps(<8 x flo
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sqrtps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:28.00]
+; BDVER2-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:28.00]
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sqrtps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [47:42.00]
@@ -4563,6 +5060,12 @@ define <4 x double> @test_subpd(<4 x dou
; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_subpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_subpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
@@ -4617,6 +5120,12 @@ define <8 x float> @test_subps(<8 x floa
; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_subps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_subps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
@@ -4689,6 +5198,15 @@ define i32 @test_testpd(<2 x double> %a0
; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_testpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
+; BDVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_testpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
@@ -4775,6 +5293,16 @@ define i32 @test_testpd_ymm(<4 x double>
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_testpd_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
+; BDVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_testpd_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
@@ -4856,6 +5384,15 @@ define i32 @test_testps(<4 x float> %a0,
; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_testps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
+; BDVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_testps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
@@ -4942,6 +5479,16 @@ define i32 @test_testps_ymm(<8 x float>
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_testps_ymm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
+; BDVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_testps_ymm:
; BTVER2: # %bb.0:
; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
@@ -5011,6 +5558,13 @@ define <4 x double> @test_unpckhpd(<4 x
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_unpckhpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
+; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_unpckhpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
@@ -5068,6 +5622,12 @@ define <8 x float> @test_unpckhps(<8 x f
; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_unpckhps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
+; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_unpckhps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
@@ -5128,6 +5688,13 @@ define <4 x double> @test_unpcklpd(<4 x
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_unpcklpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
+; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_unpcklpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
@@ -5185,6 +5752,12 @@ define <8 x float> @test_unpcklps(<8 x f
; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_unpcklps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
+; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_unpcklps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
@@ -5245,6 +5818,13 @@ define <4 x double> @test_xorpd(<4 x dou
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xorpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xorpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -5312,6 +5892,13 @@ define <8 x float> @test_xorps(<8 x floa
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xorps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xorps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
@@ -5367,6 +5954,11 @@ define void @test_zeroall() {
; SKX-NEXT: vzeroall # sched: [12:5.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_zeroall:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vzeroall # sched: [9:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_zeroall:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vzeroall # sched: [90:36.50]
@@ -5412,6 +6004,11 @@ define void @test_zeroupper() {
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_zeroupper:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_zeroupper:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vzeroupper # sched: [46:18.50]
@@ -5486,6 +6083,16 @@ define void @test_avx256_zero_idioms() {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_avx256_zero_idioms:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00]
+; BDVER2-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00]
+; BDVER2-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_avx256_zero_idioms:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
Modified: llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll Sat Oct 27 13:36:11 2018
@@ -2,6 +2,7 @@
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=VZ --check-prefix=AVX
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=VZ --check-prefix=AVX512
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx,+fast-partial-ymm-or-zmm-write | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=FAST-ymm-zmm
+; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=BDVER2
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=BTVER2
declare i32 @foo()
@@ -56,6 +57,20 @@ define <8 x float> @test01(<4 x float> %
; FAST-ymm-zmm-NEXT: addq $56, %rsp
; FAST-ymm-zmm-NEXT: retq
;
+; BDVER2-LABEL: test01:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: subq $56, %rsp
+; BDVER2-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill
+; BDVER2-NEXT: vmovaps {{.*}}(%rip), %xmm0
+; BDVER2-NEXT: vzeroupper
+; BDVER2-NEXT: callq do_sse
+; BDVER2-NEXT: vmovaps %xmm0, {{.*}}(%rip)
+; BDVER2-NEXT: callq do_sse
+; BDVER2-NEXT: vmovaps %xmm0, {{.*}}(%rip)
+; BDVER2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
+; BDVER2-NEXT: addq $56, %rsp
+; BDVER2-NEXT: retq
+;
; BTVER2-LABEL: test01:
; BTVER2: # %bb.0:
; BTVER2-NEXT: subq $56, %rsp
@@ -86,11 +101,24 @@ define <4 x float> @test02(<8 x float> %
; VZ-NEXT: vzeroupper
; VZ-NEXT: jmp do_sse # TAILCALL
;
-; NO-VZ-LABEL: test02:
-; NO-VZ: # %bb.0:
-; NO-VZ-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; NO-VZ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; NO-VZ-NEXT: jmp do_sse # TAILCALL
+; FAST-ymm-zmm-LABEL: test02:
+; FAST-ymm-zmm: # %bb.0:
+; FAST-ymm-zmm-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; FAST-ymm-zmm-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; FAST-ymm-zmm-NEXT: jmp do_sse # TAILCALL
+;
+; BDVER2-LABEL: test02:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; BDVER2-NEXT: vzeroupper
+; BDVER2-NEXT: jmp do_sse # TAILCALL
+;
+; BTVER2-LABEL: test02:
+; BTVER2: # %bb.0:
+; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; BTVER2-NEXT: jmp do_sse # TAILCALL
%add.i = fadd <8 x float> %a, %b
%add.low = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %add.i, i8 0)
%call3 = tail call <4 x float> @do_sse(<4 x float> %add.low) nounwind
@@ -162,6 +190,37 @@ define <4 x float> @test03(<4 x float> %
; FAST-ymm-zmm-NEXT: popq %rbx
; FAST-ymm-zmm-NEXT: retq
;
+; BDVER2-LABEL: test03:
+; BDVER2: # %bb.0: # %entry
+; BDVER2-NEXT: pushq %rbx
+; BDVER2-NEXT: subq $16, %rsp
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; BDVER2-NEXT: .p2align 4, 0x90
+; BDVER2-NEXT: .LBB3_1: # %while.cond
+; BDVER2-NEXT: # =>This Inner Loop Header: Depth=1
+; BDVER2-NEXT: callq foo
+; BDVER2-NEXT: testl %eax, %eax
+; BDVER2-NEXT: jne .LBB3_1
+; BDVER2-NEXT: # %bb.2: # %for.body.preheader
+; BDVER2-NEXT: movl $4, %ebx
+; BDVER2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; BDVER2-NEXT: .p2align 4, 0x90
+; BDVER2-NEXT: .LBB3_3: # %for.body
+; BDVER2-NEXT: # =>This Inner Loop Header: Depth=1
+; BDVER2-NEXT: callq do_sse
+; BDVER2-NEXT: callq do_sse
+; BDVER2-NEXT: vmovaps {{.*}}(%rip), %ymm0
+; BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0
+; BDVER2-NEXT: vzeroupper
+; BDVER2-NEXT: callq do_sse
+; BDVER2-NEXT: addl $-1, %ebx
+; BDVER2-NEXT: jne .LBB3_3
+; BDVER2-NEXT: # %bb.4: # %for.end
+; BDVER2-NEXT: addq $16, %rsp
+; BDVER2-NEXT: popq %rbx
+; BDVER2-NEXT: retq
+;
; BTVER2-LABEL: test03:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: pushq %rbx
@@ -230,15 +289,36 @@ define <4 x float> @test04(<4 x float> %
; VZ-NEXT: vzeroupper
; VZ-NEXT: retq
;
-; NO-VZ-LABEL: test04:
-; NO-VZ: # %bb.0:
-; NO-VZ-NEXT: pushq %rax
-; NO-VZ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; NO-VZ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; NO-VZ-NEXT: callq do_avx
-; NO-VZ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; NO-VZ-NEXT: popq %rax
-; NO-VZ-NEXT: retq
+; FAST-ymm-zmm-LABEL: test04:
+; FAST-ymm-zmm: # %bb.0:
+; FAST-ymm-zmm-NEXT: pushq %rax
+; FAST-ymm-zmm-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; FAST-ymm-zmm-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; FAST-ymm-zmm-NEXT: callq do_avx
+; FAST-ymm-zmm-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; FAST-ymm-zmm-NEXT: popq %rax
+; FAST-ymm-zmm-NEXT: retq
+;
+; BDVER2-LABEL: test04:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pushq %rax
+; BDVER2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; BDVER2-NEXT: callq do_avx
+; BDVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; BDVER2-NEXT: popq %rax
+; BDVER2-NEXT: vzeroupper
+; BDVER2-NEXT: retq
+;
+; BTVER2-LABEL: test04:
+; BTVER2: # %bb.0:
+; BTVER2-NEXT: pushq %rax
+; BTVER2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; BTVER2-NEXT: callq do_avx
+; BTVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; BTVER2-NEXT: popq %rax
+; BTVER2-NEXT: retq
%shuf = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%call = call <8 x float> @do_avx(<8 x float> %shuf) nounwind
%shuf2 = shufflevector <8 x float> %call, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
Modified: llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi-schedule.ll Sat Oct 27 13:36:11 2018
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -36,6 +37,13 @@ define i32 @test_andn_i32(i32 %a0, i32 %
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andn_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andn_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [4:1.00]
@@ -86,6 +94,13 @@ define i64 @test_andn_i64(i64 %a0, i64 %
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_andn_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_andn_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [4:1.00]
@@ -136,6 +151,13 @@ define i32 @test_bextr_i32(i32 %a0, i32
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bextr_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:1.00]
+; BDVER2-NEXT: bextrl %edi, %esi, %eax # sched: [2:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bextr_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [4:1.00]
@@ -186,6 +208,13 @@ define i64 @test_bextr_i64(i64 %a0, i64
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bextr_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:1.00]
+; BDVER2-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bextr_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [4:1.00]
@@ -236,6 +265,13 @@ define i32 @test_blsi_i32(i32 %a0, i32 *
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsi_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsil (%rsi), %ecx # sched: [6:0.50]
+; BDVER2-NEXT: blsil %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsi_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsil (%rsi), %ecx # sched: [5:1.00]
@@ -287,6 +323,13 @@ define i64 @test_blsi_i64(i64 %a0, i64 *
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsi_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50]
+; BDVER2-NEXT: blsiq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsi_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsiq (%rsi), %rcx # sched: [5:1.00]
@@ -338,6 +381,13 @@ define i32 @test_blsmsk_i32(i32 %a0, i32
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsmsk_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50]
+; BDVER2-NEXT: blsmskl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsmsk_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsmskl (%rsi), %ecx # sched: [5:1.00]
@@ -389,6 +439,13 @@ define i64 @test_blsmsk_i64(i64 %a0, i64
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsmsk_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50]
+; BDVER2-NEXT: blsmskq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsmsk_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsmskq (%rsi), %rcx # sched: [5:1.00]
@@ -440,6 +497,13 @@ define i32 @test_blsr_i32(i32 %a0, i32 *
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsr_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50]
+; BDVER2-NEXT: blsrl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsr_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsrl (%rsi), %ecx # sched: [5:1.00]
@@ -491,6 +555,13 @@ define i64 @test_blsr_i64(i64 %a0, i64 *
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_blsr_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50]
+; BDVER2-NEXT: blsrq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_blsr_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsrq (%rsi), %rcx # sched: [5:1.00]
@@ -546,6 +617,14 @@ define i16 @test_cttz_i16(i16 zeroext %a
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cttz_i16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00]
+; BDVER2-NEXT: tzcntw %di, %ax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cttz_i16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: tzcntw (%rsi), %cx # sched: [5:1.00]
@@ -598,6 +677,13 @@ define i32 @test_cttz_i32(i32 %a0, i32 *
; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cttz_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00]
+; BDVER2-NEXT: tzcntl %edi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cttz_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: tzcntl (%rsi), %ecx # sched: [5:1.00]
@@ -648,6 +734,13 @@ define i64 @test_cttz_i64(i64 %a0, i64 *
; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cttz_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00]
+; BDVER2-NEXT: tzcntq %rdi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cttz_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: tzcntq (%rsi), %rcx # sched: [5:1.00]
Modified: llvm/trunk/test/CodeGen/X86/cmov-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmov-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmov-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmov-schedule.ll Sat Oct 27 13:36:11 2018
@@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -540,6 +541,72 @@ define void @test_cmov_16(i16 %a0, i16 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmov_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmovow %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnow %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovbew %si, %di # sched: [3:1.00]
+; BDVER2-NEXT: cmovbew %si, %di # sched: [3:1.00]
+; BDVER2-NEXT: cmovaw %si, %di # sched: [3:1.00]
+; BDVER2-NEXT: cmovaw %si, %di # sched: [3:1.00]
+; BDVER2-NEXT: cmovsw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnsw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovpw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovpw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovlw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovlw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovgew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovgew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovlew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovlew %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovgw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovgw %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: cmovow (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnow (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [8:1.00]
+; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [8:1.00]
+; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [8:1.00]
+; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [8:1.00]
+; BDVER2-NEXT: cmovsw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnsw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmov_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1204,6 +1271,72 @@ define void @test_cmov_32(i32 %a0, i32 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmov_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmovol %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnol %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbel %esi, %edi # sched: [3:1.00]
+; BDVER2-NEXT: cmovbel %esi, %edi # sched: [3:1.00]
+; BDVER2-NEXT: cmoval %esi, %edi # sched: [3:1.00]
+; BDVER2-NEXT: cmoval %esi, %edi # sched: [3:1.00]
+; BDVER2-NEXT: cmovsl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnsl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovpl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovpl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovll %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovll %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovlel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovlel %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgl %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: cmovol (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnol (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00]
+; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00]
+; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [8:1.00]
+; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [8:1.00]
+; BDVER2-NEXT: cmovsl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnsl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmov_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1868,6 +2001,72 @@ define void @test_cmov_64(i64 %a0, i64 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmov_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmovoq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnoq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: cmovsq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnsq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: cmovoq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnoq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: cmovsq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnsq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmov_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
Modified: llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/f16c-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/f16c-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/f16c-schedule.ll Sat Oct 27 13:36:11 2018
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -43,6 +44,13 @@ define <4 x float> @test_vcvtph2ps_128(<
; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_vcvtph2ps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
+; BDVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_vcvtph2ps_128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
@@ -100,6 +108,13 @@ define <8 x float> @test_vcvtph2ps_256(<
; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_vcvtph2ps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
+; BDVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_vcvtph2ps_256:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:2.00]
@@ -152,6 +167,12 @@ define <8 x i16> @test_vcvtps2ph_128(<4
; SKYLAKE-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [6:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_vcvtps2ph_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_vcvtps2ph_128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
@@ -207,6 +228,13 @@ define <8 x i16> @test_vcvtps2ph_256(<8
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_vcvtps2ph_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_vcvtps2ph_256:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:2.00]
Modified: llvm/trunk/test/CodeGen/X86/fma-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-schedule.ll Sat Oct 27 13:36:11 2018
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
@@ -24,6 +25,18 @@ define void @test_vfmaddpd_128(<2 x doub
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -113,6 +126,19 @@ define void @test_vfmaddpd_256(<4 x doub
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -206,6 +232,18 @@ define void @test_vfmaddps_128(<4 x floa
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -295,6 +333,19 @@ define void @test_vfmaddps_256(<8 x floa
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -388,6 +439,18 @@ define void @test_vfmaddsd_128(<2 x doub
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddsd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -476,6 +539,18 @@ define void @test_vfmaddss_128(<4 x floa
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddss_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -568,6 +643,18 @@ define void @test_vfmaddsubpd_128(<2 x d
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddsubpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -657,6 +744,19 @@ define void @test_vfmaddsubpd_256(<4 x d
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddsubpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -750,6 +850,18 @@ define void @test_vfmaddsubps_128(<4 x f
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddsubps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -839,6 +951,19 @@ define void @test_vfmaddsubps_256(<8 x f
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmaddsubps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
+; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmaddsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -936,6 +1061,18 @@ define void @test_vfmsubaddpd_128(<2 x d
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubaddpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1025,6 +1162,19 @@ define void @test_vfmsubaddpd_256(<4 x d
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubaddpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1118,6 +1268,18 @@ define void @test_vfmsubaddps_128(<4 x f
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubaddps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1207,6 +1369,19 @@ define void @test_vfmsubaddps_256(<8 x f
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubaddps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1304,6 +1479,18 @@ define void @test_vfmsubpd_128(<2 x doub
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1393,6 +1580,19 @@ define void @test_vfmsubpd_256(<4 x doub
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1486,6 +1686,18 @@ define void @test_vfmsubps_128(<4 x floa
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1575,6 +1787,19 @@ define void @test_vfmsubps_256(<8 x floa
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1668,6 +1893,18 @@ define void @test_vfmsubsd_128(<2 x doub
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubsd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1756,6 +1993,18 @@ define void @test_vfmsubss_128(<4 x floa
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfmsubss_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfmsubss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1848,6 +2097,18 @@ define void @test_vfnmaddpd_128(<2 x dou
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -1937,6 +2198,19 @@ define void @test_vfnmaddpd_256(<4 x dou
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2030,6 +2304,18 @@ define void @test_vfnmaddps_128(<4 x flo
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2119,6 +2405,19 @@ define void @test_vfnmaddps_256(<8 x flo
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2212,6 +2511,18 @@ define void @test_vfnmaddsd_128(<2 x dou
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddsd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2300,6 +2611,18 @@ define void @test_vfnmaddss_128(<4 x flo
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmaddss_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmaddss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2392,6 +2715,18 @@ define void @test_vfnmsubpd_128(<2 x dou
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubpd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2481,6 +2816,19 @@ define void @test_vfnmsubpd_256(<4 x dou
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubpd_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2574,6 +2922,18 @@ define void @test_vfnmsubps_128(<4 x flo
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubps_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2663,6 +3023,19 @@ define void @test_vfnmsubps_256(<8 x flo
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubps_256:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2756,6 +3129,18 @@ define void @test_vfnmsubsd_128(<2 x dou
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubsd_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@@ -2844,6 +3229,18 @@ define void @test_vfnmsubss_128(<4 x flo
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_vfnmsubss_128:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
+; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
+; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; HASWELL-LABEL: test_vfnmsubss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
Modified: llvm/trunk/test/CodeGen/X86/fma4-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma4-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma4-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma4-schedule.ll Sat Oct 27 13:36:11 2018
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER34,BDVER3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER34,BDVER4
;
; VFMADD
@@ -19,14 +19,23 @@ define void @test_vfmaddpd_128(<2 x doub
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddpd $2, $1, $0, $0 \0A\09 vfmaddpd $3, $1, $0, $0 \0A\09 vfmaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
@@ -42,15 +51,25 @@ define void @test_vfmaddpd_256(<4 x doub
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmaddpd $2, $1, $0, $0 \0A\09 vfmaddpd $3, $1, $0, $0 \0A\09 vfmaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
@@ -65,14 +84,23 @@ define void @test_vfmaddps_128(<4 x floa
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddps $2, $1, $0, $0 \0A\09 vfmaddps $3, $1, $0, $0 \0A\09 vfmaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
@@ -88,15 +116,25 @@ define void @test_vfmaddps_256(<8 x floa
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmaddps $2, $1, $0, $0 \0A\09 vfmaddps $3, $1, $0, $0 \0A\09 vfmaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
@@ -111,14 +149,23 @@ define void @test_vfmaddsd_128(<2 x doub
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddsd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddsd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddsd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddsd $2, $1, $0, $0 \0A\09 vfmaddsd $3, $1, $0, $0 \0A\09 vfmaddsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
@@ -133,14 +180,23 @@ define void @test_vfmaddss_128(<4 x floa
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddss_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddss_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddss_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddss $2, $1, $0, $0 \0A\09 vfmaddss $3, $1, $0, $0 \0A\09 vfmaddss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
@@ -159,14 +215,23 @@ define void @test_vfmaddsubpd_128(<2 x d
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddsubpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddsubpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddsubpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddsubpd $2, $1, $0, $0 \0A\09 vfmaddsubpd $3, $1, $0, $0 \0A\09 vfmaddsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
@@ -182,15 +247,25 @@ define void @test_vfmaddsubpd_256(<4 x d
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddsubpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddsubpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddsubpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmaddsubpd $2, $1, $0, $0 \0A\09 vfmaddsubpd $3, $1, $0, $0 \0A\09 vfmaddsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
@@ -205,14 +280,23 @@ define void @test_vfmaddsubps_128(<4 x f
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddsubps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddsubps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddsubps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmaddsubps $2, $1, $0, $0 \0A\09 vfmaddsubps $3, $1, $0, $0 \0A\09 vfmaddsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
@@ -228,15 +312,25 @@ define void @test_vfmaddsubps_256(<8 x f
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmaddsubps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmaddsubps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmaddsubps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmaddsubps $2, $1, $0, $0 \0A\09 vfmaddsubps $3, $1, $0, $0 \0A\09 vfmaddsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
@@ -255,14 +349,23 @@ define void @test_vfmsubaddpd_128(<2 x d
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubaddpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubaddpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubaddpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubaddpd $2, $1, $0, $0 \0A\09 vfmsubaddpd $3, $1, $0, $0 \0A\09 vfmsubaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
@@ -278,15 +381,25 @@ define void @test_vfmsubaddpd_256(<4 x d
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubaddpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubaddpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubaddpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmsubaddpd $2, $1, $0, $0 \0A\09 vfmsubaddpd $3, $1, $0, $0 \0A\09 vfmsubaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
@@ -301,14 +414,23 @@ define void @test_vfmsubaddps_128(<4 x f
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubaddps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubaddps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubaddps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubaddps $2, $1, $0, $0 \0A\09 vfmsubaddps $3, $1, $0, $0 \0A\09 vfmsubaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
@@ -324,15 +446,25 @@ define void @test_vfmsubaddps_256(<8 x f
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubaddps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubaddps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubaddps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmsubaddps $2, $1, $0, $0 \0A\09 vfmsubaddps $3, $1, $0, $0 \0A\09 vfmsubaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
@@ -351,14 +483,23 @@ define void @test_vfmsubpd_128(<2 x doub
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubpd $2, $1, $0, $0 \0A\09 vfmsubpd $3, $1, $0, $0 \0A\09 vfmsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
@@ -374,15 +515,25 @@ define void @test_vfmsubpd_256(<4 x doub
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmsubpd $2, $1, $0, $0 \0A\09 vfmsubpd $3, $1, $0, $0 \0A\09 vfmsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
@@ -397,14 +548,23 @@ define void @test_vfmsubps_128(<4 x floa
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubps $2, $1, $0, $0 \0A\09 vfmsubps $3, $1, $0, $0 \0A\09 vfmsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
@@ -420,15 +580,25 @@ define void @test_vfmsubps_256(<8 x floa
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfmsubps $2, $1, $0, $0 \0A\09 vfmsubps $3, $1, $0, $0 \0A\09 vfmsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
@@ -443,14 +613,23 @@ define void @test_vfmsubsd_128(<2 x doub
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubsd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubsd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubsd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubsd $2, $1, $0, $0 \0A\09 vfmsubsd $3, $1, $0, $0 \0A\09 vfmsubsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
@@ -465,14 +644,23 @@ define void @test_vfmsubss_128(<4 x floa
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfmsubss_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfmsubss_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfmsubss_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfmsubss $2, $1, $0, $0 \0A\09 vfmsubss $3, $1, $0, $0 \0A\09 vfmsubss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
@@ -491,14 +679,23 @@ define void @test_vfnmaddpd_128(<2 x dou
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddpd $2, $1, $0, $0 \0A\09 vfnmaddpd $3, $1, $0, $0 \0A\09 vfnmaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
@@ -514,15 +711,25 @@ define void @test_vfnmaddpd_256(<4 x dou
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddpd $2, $1, $0, $0 \0A\09 vfnmaddpd $3, $1, $0, $0 \0A\09 vfnmaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
@@ -537,14 +744,23 @@ define void @test_vfnmaddps_128(<4 x flo
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddps $2, $1, $0, $0 \0A\09 vfnmaddps $3, $1, $0, $0 \0A\09 vfnmaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
@@ -560,15 +776,25 @@ define void @test_vfnmaddps_256(<8 x flo
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddps $2, $1, $0, $0 \0A\09 vfnmaddps $3, $1, $0, $0 \0A\09 vfnmaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
@@ -583,14 +809,23 @@ define void @test_vfnmaddsd_128(<2 x dou
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddsd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddsd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddsd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddsd $2, $1, $0, $0 \0A\09 vfnmaddsd $3, $1, $0, $0 \0A\09 vfnmaddsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
@@ -605,14 +840,23 @@ define void @test_vfnmaddss_128(<4 x flo
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmaddss_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmaddss_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmaddss_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmaddss $2, $1, $0, $0 \0A\09 vfnmaddss $3, $1, $0, $0 \0A\09 vfnmaddss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
@@ -631,14 +875,23 @@ define void @test_vfnmsubpd_128(<2 x dou
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubpd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubpd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubpd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubpd $2, $1, $0, $0 \0A\09 vfnmsubpd $3, $1, $0, $0 \0A\09 vfnmsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
@@ -654,15 +907,25 @@ define void @test_vfnmsubpd_256(<4 x dou
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubpd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubpd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubpd_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubpd $2, $1, $0, $0 \0A\09 vfnmsubpd $3, $1, $0, $0 \0A\09 vfnmsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
ret void
}
@@ -677,14 +940,23 @@ define void @test_vfnmsubps_128(<4 x flo
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubps_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubps $2, $1, $0, $0 \0A\09 vfnmsubps $3, $1, $0, $0 \0A\09 vfnmsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
@@ -700,15 +972,25 @@ define void @test_vfnmsubps_256(<8 x flo
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubps_256:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER34-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: vzeroupper
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubps $2, $1, $0, $0 \0A\09 vfnmsubps $3, $1, $0, $0 \0A\09 vfnmsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
ret void
}
@@ -723,14 +1005,23 @@ define void @test_vfnmsubsd_128(<2 x dou
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubsd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubsd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubsd_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubsd $2, $1, $0, $0 \0A\09 vfnmsubsd $3, $1, $0, $0 \0A\09 vfnmsubsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
ret void
}
@@ -745,14 +1036,23 @@ define void @test_vfnmsubss_128(<4 x flo
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfnmsubss_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfnmsubss_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER12-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER34-LABEL: test_vfnmsubss_128:
+; BDVER34: # %bb.0:
+; BDVER34-NEXT: #APP
+; BDVER34-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER34-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0
+; BDVER34-NEXT: #NO_APP
+; BDVER34-NEXT: retq
tail call void asm "vfnmsubss $2, $1, $0, $0 \0A\09 vfnmsubss $3, $1, $0, $0 \0A\09 vfnmsubss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
ret void
}
Modified: llvm/trunk/test/CodeGen/X86/lea32-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea32-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea32-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea32-schedule.ll Sat Oct 27 13:36:11 2018
@@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-slow-3ops-lea | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -60,6 +61,12 @@ define i32 @test_lea_offset(i32) {
; SKYLAKE-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
@@ -124,6 +131,12 @@ define i32 @test_lea_offset_big(i32) {
; SKYLAKE-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
@@ -196,6 +209,13 @@ define i32 @test_lea_add(i32, i32) {
; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
@@ -274,6 +294,13 @@ define i32 @test_lea_add_offset(i32, i32
; SKYLAKE-NEXT: addl $16, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
@@ -358,6 +385,13 @@ define i32 @test_lea_add_offset_big(i32,
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
@@ -425,6 +459,12 @@ define i32 @test_lea_mul(i32) {
; SKYLAKE-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
@@ -494,6 +534,12 @@ define i32 @test_lea_mul_offset(i32) {
; SKYLAKE-NEXT: addl $-32, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
@@ -569,6 +615,12 @@ define i32 @test_lea_mul_offset_big(i32)
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
@@ -641,6 +693,13 @@ define i32 @test_lea_add_scale(i32, i32)
; SKYLAKE-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
@@ -720,6 +779,13 @@ define i32 @test_lea_add_scale_offset(i3
; SKYLAKE-NEXT: addl $96, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
@@ -805,6 +871,13 @@ define i32 @test_lea_add_scale_offset_bi
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
+; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
+; BDVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
Modified: llvm/trunk/test/CodeGen/X86/lea64-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea64-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea64-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea64-schedule.ll Sat Oct 27 13:36:11 2018
@@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-slow-3ops-lea | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -53,6 +54,11 @@ define i64 @test_lea_offset(i64) {
; SKYLAKE-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
@@ -108,6 +114,11 @@ define i64 @test_lea_offset_big(i64) {
; SKYLAKE-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
@@ -164,6 +175,11 @@ define i64 @test_lea_add(i64, i64) {
; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
@@ -224,6 +240,11 @@ define i64 @test_lea_add_offset(i64, i64
; SKYLAKE-NEXT: addq $16, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [2:1.00]
@@ -290,6 +311,11 @@ define i64 @test_lea_add_offset_big(i64,
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [2:1.00]
@@ -346,6 +372,11 @@ define i64 @test_lea_mul(i64) {
; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
@@ -406,6 +437,11 @@ define i64 @test_lea_mul_offset(i64) {
; SKYLAKE-NEXT: addq $-32, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [2:1.00]
@@ -472,6 +508,11 @@ define i64 @test_lea_mul_offset_big(i64)
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_mul_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_mul_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [2:1.00]
@@ -528,6 +569,11 @@ define i64 @test_lea_add_scale(i64, i64)
; SKYLAKE-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [2:1.00]
@@ -589,6 +635,11 @@ define i64 @test_lea_add_scale_offset(i6
; SKYLAKE-NEXT: addq $96, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale_offset:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [2:1.00]
@@ -656,6 +707,11 @@ define i64 @test_lea_add_scale_offset_bi
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lea_add_scale_offset_big:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lea_add_scale_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [2:1.00]
Modified: llvm/trunk/test/CodeGen/X86/lwp-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lwp-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lwp-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lwp-schedule.ll Sat Oct 27 13:36:11 2018
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
@@ -11,10 +11,20 @@ define void @test_llwpcb(i8 *%a0) nounwi
; GENERIC-NEXT: llwpcb %rdi # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_llwpcb:
-; BDVER: # %bb.0:
-; BDVER-NEXT: llwpcb %rdi
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_llwpcb:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: llwpcb %rdi # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_llwpcb:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: llwpcb %rdi
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_llwpcb:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: llwpcb %rdi
+; BDVER4-NEXT: retq
tail call void @llvm.x86.llwpcb(i8 *%a0)
ret void
}
@@ -25,10 +35,20 @@ define i8* @test_slwpcb(i8 *%a0) nounwin
; GENERIC-NEXT: slwpcb %rax # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_slwpcb:
-; BDVER: # %bb.0:
-; BDVER-NEXT: slwpcb %rax
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_slwpcb:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: slwpcb %rax # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_slwpcb:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: slwpcb %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_slwpcb:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: slwpcb %rax
+; BDVER4-NEXT: retq
%1 = tail call i8* @llvm.x86.slwpcb()
ret i8 *%1
}
@@ -42,12 +62,27 @@ define i8 @test_lwpins32_rri(i32 %a0, i3
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpins32_rri:
-; BDVER: # %bb.0:
-; BDVER-NEXT: addl %esi, %esi
-; BDVER-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
-; BDVER-NEXT: setb %al
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpins32_rri:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.33]
+; BDVER12-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: setb %al # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpins32_rri:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: addl %esi, %esi
+; BDVER3-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
+; BDVER3-NEXT: setb %al
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpins32_rri:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: addl %esi, %esi
+; BDVER4-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
+; BDVER4-NEXT: setb %al
+; BDVER4-NEXT: retq
%1 = add i32 %a1, %a1
%2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %1, i32 2309737967)
ret i8 %2
@@ -61,11 +96,24 @@ define i8 @test_lwpins32_rmi(i32 %a0, i3
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpins32_rmi:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
-; BDVER-NEXT: setb %al
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpins32_rmi:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: setb %al # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpins32_rmi:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
+; BDVER3-NEXT: setb %al
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpins32_rmi:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
+; BDVER4-NEXT: setb %al
+; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
%1 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 1985229328)
ret i8 %1
@@ -79,11 +127,24 @@ define i8 @test_lwpins64_rri(i64 %a0, i3
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpins64_rri:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
-; BDVER-NEXT: setb %al
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpins64_rri:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: setb %al # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpins64_rri:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
+; BDVER3-NEXT: setb %al
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpins64_rri:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
+; BDVER4-NEXT: setb %al
+; BDVER4-NEXT: retq
%1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2309737967)
ret i8 %1
}
@@ -96,11 +157,24 @@ define i8 @test_lwpins64_rmi(i64 %a0, i3
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpins64_rmi:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
-; BDVER-NEXT: setb %al
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpins64_rmi:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: setb %al # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpins64_rmi:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
+; BDVER3-NEXT: setb %al
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpins64_rmi:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
+; BDVER4-NEXT: setb %al
+; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
%1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 1985229328)
ret i8 %1
@@ -114,11 +188,24 @@ define void @test_lwpval32_rri(i32 %a0,
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpval32_rri:
-; BDVER: # %bb.0:
-; BDVER-NEXT: addl %esi, %esi
-; BDVER-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpval32_rri:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.33]
+; BDVER12-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpval32_rri:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: addl %esi, %esi
+; BDVER3-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpval32_rri:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: addl %esi, %esi
+; BDVER4-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
+; BDVER4-NEXT: retq
%1 = add i32 %a1, %a1
tail call void @llvm.x86.lwpval32(i32 %a0, i32 %1, i32 4275878552)
ret void
@@ -131,10 +218,21 @@ define void @test_lwpval32_rmi(i32 %a0,
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpval32_rmi:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpval32_rmi:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpval32_rmi:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpval32_rmi:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
+; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 305419896)
ret void
@@ -147,10 +245,21 @@ define void @test_lwpval64_rri(i64 %a0,
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpval64_rri:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpval64_rri:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpval64_rri:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpval64_rri:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
+; BDVER4-NEXT: retq
tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 4275878552)
ret void
}
@@ -162,10 +271,21 @@ define void @test_lwpval64_rmi(i64 %a0,
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_lwpval64_rmi:
-; BDVER: # %bb.0:
-; BDVER-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_lwpval64_rmi:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
+; BDVER12-NEXT: # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_lwpval64_rmi:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_lwpval64_rmi:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
+; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 305419896)
ret void
Modified: llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll Sat Oct 27 13:36:11 2018
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -40,6 +41,14 @@ define i16 @test_ctlz_i16(i16 zeroext %a
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctlz_i16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00]
+; BDVER2-NEXT: lzcntw %di, %ax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctlz_i16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lzcntw (%rsi), %cx # sched: [4:1.00]
@@ -92,6 +101,13 @@ define i32 @test_ctlz_i32(i32 %a0, i32 *
; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctlz_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00]
+; BDVER2-NEXT: lzcntl %edi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctlz_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lzcntl (%rsi), %ecx # sched: [4:1.00]
@@ -142,6 +158,13 @@ define i64 @test_ctlz_i64(i64 %a0, i64 *
; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctlz_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00]
+; BDVER2-NEXT: lzcntq %rdi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctlz_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lzcntq (%rsi), %rcx # sched: [4:1.00]
Modified: llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-schedule.ll Sat Oct 27 13:36:11 2018
@@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -76,6 +77,14 @@ define i64 @test_cvtpd2pi(<2 x double> %
; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtpd2pi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
+; BDVER2-NEXT: por %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtpd2pi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [8:1.00]
@@ -157,6 +166,13 @@ define <2 x double> @test_cvtpi2pd(x86_m
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtpi2pd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtpi2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [8:1.00]
@@ -235,6 +251,13 @@ define <4 x float> @test_cvtpi2ps(x86_mm
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtpi2ps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtpi2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
@@ -321,6 +344,14 @@ define i64 @test_cvtps2pi(<4 x float> %a
; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvtps2pi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
+; BDVER2-NEXT: por %mm0, %mm1 # sched: [1:0.33]
+; BDVER2-NEXT: movq %mm1, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvtps2pi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
@@ -410,6 +441,14 @@ define i64 @test_cvttpd2pi(<2 x double>
; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvttpd2pi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
+; BDVER2-NEXT: por %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvttpd2pi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [8:1.00]
@@ -499,6 +538,14 @@ define i64 @test_cvttps2pi(<4 x float> %
; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cvttps2pi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
+; BDVER2-NEXT: por %mm0, %mm1 # sched: [1:0.33]
+; BDVER2-NEXT: movq %mm1, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cvttps2pi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
@@ -564,6 +611,11 @@ define void @test_emms() optsize {
; SKX-NEXT: emms # sched: [10:4.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_emms:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: emms # sched: [31:10.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_emms:
; BTVER2: # %bb.0:
; BTVER2-NEXT: emms # sched: [2:0.50]
@@ -619,6 +671,11 @@ define void @test_maskmovq(x86_mmx %a0,
; SKX-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_maskmovq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_maskmovq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: maskmovq %mm1, %mm0 # sched: [1:0.50]
@@ -722,6 +779,17 @@ define i32 @test_movd(x86_mmx %a0, i32 %
; SKX-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movd %edi, %mm1 # sched: [1:1.00]
+; BDVER2-NEXT: movd (%rsi), %mm2 # sched: [5:0.50]
+; BDVER2-NEXT: paddd %mm1, %mm2 # sched: [3:1.00]
+; BDVER2-NEXT: paddd %mm2, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: movd %mm2, %ecx # sched: [2:1.00]
+; BDVER2-NEXT: movd %mm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movd %edi, %mm1 # sched: [8:0.50]
@@ -815,6 +883,13 @@ define i64 @test_movdq2q(<2 x i64> %a0)
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movdq2q:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00]
+; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movdq2q:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50]
@@ -876,6 +951,11 @@ define void @test_movntq(x86_mmx* %a0, x
; SKX-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movntq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movntq %mm0, (%rdi) # sched: [2:1.00]
@@ -949,6 +1029,13 @@ define void @test_movq(i64 *%a0) {
; SKX-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq (%rdi), %mm0 # sched: [5:0.50]
+; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq (%rdi), %mm0 # sched: [5:1.00]
@@ -1011,6 +1098,11 @@ define <2 x i64> @test_movq2dq(x86_mmx %
; SKX-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movq2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movq2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50]
@@ -1082,6 +1174,13 @@ define i64 @test_pabsb(x86_mmx *%a0) opt
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pabsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pabsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:1.00]
@@ -1160,6 +1259,13 @@ define i64 @test_pabsd(x86_mmx *%a0) opt
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pabsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pabsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:1.00]
@@ -1238,6 +1344,13 @@ define i64 @test_pabsw(x86_mmx *%a0) opt
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pabsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pabsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:1.00]
@@ -1316,6 +1429,13 @@ define i64 @test_packssdw(x86_mmx %a0, x
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_packssdw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_packssdw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50]
@@ -1394,6 +1514,13 @@ define i64 @test_packsswb(x86_mmx %a0, x
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_packsswb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_packsswb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50]
@@ -1472,6 +1599,13 @@ define i64 @test_packuswb(x86_mmx %a0, x
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_packuswb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_packuswb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50]
@@ -1550,6 +1684,13 @@ define i64 @test_paddb(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddb %mm1, %mm0 # sched: [1:0.50]
@@ -1628,6 +1769,13 @@ define i64 @test_paddd(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddd %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddd %mm1, %mm0 # sched: [1:0.50]
@@ -1706,6 +1854,13 @@ define i64 @test_paddq(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
@@ -1784,6 +1939,13 @@ define i64 @test_paddsb(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50]
@@ -1862,6 +2024,13 @@ define i64 @test_paddsw(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50]
@@ -1940,6 +2109,13 @@ define i64 @test_paddusb(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddusb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddusb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50]
@@ -2018,6 +2194,13 @@ define i64 @test_paddusw(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddusw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddusw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50]
@@ -2096,6 +2279,13 @@ define i64 @test_paddw(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_paddw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: paddw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_paddw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: paddw %mm1, %mm0 # sched: [1:0.50]
@@ -2174,6 +2364,13 @@ define i64 @test_palignr(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_palignr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_palignr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50]
@@ -2252,6 +2449,13 @@ define i64 @test_pand(x86_mmx %a0, x86_m
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pand:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: pand (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pand:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.50]
@@ -2330,6 +2534,13 @@ define i64 @test_pandn(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pandn:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pandn:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.50]
@@ -2408,6 +2619,13 @@ define i64 @test_pavgb(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pavgb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pavgb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50]
@@ -2486,6 +2704,13 @@ define i64 @test_pavgw(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pavgw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pavgw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50]
@@ -2564,6 +2789,13 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpeqb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpeqb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50]
@@ -2642,6 +2874,13 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpeqd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpeqd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50]
@@ -2720,6 +2959,13 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpeqw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpeqw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50]
@@ -2798,6 +3044,13 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpgtb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpgtb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50]
@@ -2876,6 +3129,13 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpgtd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpgtd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50]
@@ -2954,6 +3214,13 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pcmpgtw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pcmpgtw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50]
@@ -3016,6 +3283,11 @@ define i32 @test_pextrw(x86_mmx %a0) opt
; SKX-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pextrw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pextrw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
@@ -3087,6 +3359,13 @@ define i64 @test_phaddd(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phaddd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phaddd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50]
@@ -3165,6 +3444,13 @@ define i64 @test_phaddsw(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phaddsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phaddsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50]
@@ -3243,6 +3529,13 @@ define i64 @test_phaddw(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phaddw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phaddw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50]
@@ -3321,6 +3614,13 @@ define i64 @test_phsubd(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phsubd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phsubd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50]
@@ -3399,6 +3699,13 @@ define i64 @test_phsubsw(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phsubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phsubsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50]
@@ -3477,6 +3784,13 @@ define i64 @test_phsubw(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_phsubw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50]
+; BDVER2-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_phsubw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50]
@@ -3563,6 +3877,14 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pinsrw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00]
+; BDVER2-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
+; BDVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pinsrw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [7:0.50]
@@ -3644,6 +3966,13 @@ define i64 @test_pmaddwd(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmaddwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmaddwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [2:1.00]
@@ -3722,6 +4051,13 @@ define i64 @test_pmaddubsw(x86_mmx %a0,
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmaddubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmaddubsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [2:1.00]
@@ -3800,6 +4136,13 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmaxsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmaxsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50]
@@ -3878,6 +4221,13 @@ define i64 @test_pmaxub(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmaxub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmaxub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50]
@@ -3956,6 +4306,13 @@ define i64 @test_pminsw(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pminsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pminsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50]
@@ -4034,6 +4391,13 @@ define i64 @test_pminub(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pminub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pminub %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pminub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pminub %mm1, %mm0 # sched: [1:0.50]
@@ -4096,6 +4460,11 @@ define i32 @test_pmovmskb(x86_mmx %a0) o
; SKX-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmovmskb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmovmskb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00]
@@ -4167,6 +4536,13 @@ define i64 @test_pmulhrsw(x86_mmx %a0, x
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmulhrsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmulhrsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [2:1.00]
@@ -4245,6 +4621,13 @@ define i64 @test_pmulhw(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmulhw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmulhw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmulhw %mm1, %mm0 # sched: [2:1.00]
@@ -4323,6 +4706,13 @@ define i64 @test_pmulhuw(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmulhuw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmulhuw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [2:1.00]
@@ -4401,6 +4791,13 @@ define i64 @test_pmullw(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmullw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmullw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmullw %mm1, %mm0 # sched: [2:1.00]
@@ -4479,6 +4876,13 @@ define i64 @test_pmuludq(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pmuludq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pmuludq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pmuludq %mm1, %mm0 # sched: [2:1.00]
@@ -4557,6 +4961,13 @@ define i64 @test_por(x86_mmx %a0, x86_mm
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_por:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: por %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: por (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_por:
; BTVER2: # %bb.0:
; BTVER2-NEXT: por %mm1, %mm0 # sched: [1:0.50]
@@ -4635,6 +5046,13 @@ define i64 @test_psadbw(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psadbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00]
+; BDVER2-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psadbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psadbw %mm1, %mm0 # sched: [2:0.50]
@@ -4713,6 +5131,13 @@ define i64 @test_pshufb(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pshufb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pshufb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pshufb %mm1, %mm0 # sched: [2:0.50]
@@ -4791,6 +5216,13 @@ define i64 @test_pshufw(x86_mmx *%a0) op
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pshufw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
+; BDVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pshufw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
@@ -4869,6 +5301,13 @@ define i64 @test_psignb(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psignb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psignb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
@@ -4947,6 +5386,13 @@ define i64 @test_psignd(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psignd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psignd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
@@ -5025,6 +5471,13 @@ define i64 @test_psignw(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psignw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
+; BDVER2-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psignw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
@@ -5111,6 +5564,14 @@ define i64 @test_pslld(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pslld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pslld %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: pslld $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pslld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pslld %mm1, %mm0 # sched: [1:0.50]
@@ -5201,6 +5662,14 @@ define i64 @test_psllq(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psllq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psllq %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psllq $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psllq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psllq %mm1, %mm0 # sched: [1:0.50]
@@ -5291,6 +5760,14 @@ define i64 @test_psllw(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psllw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psllw %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psllw $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psllw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psllw %mm1, %mm0 # sched: [1:0.50]
@@ -5381,6 +5858,14 @@ define i64 @test_psrad(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psrad:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psrad %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psrad $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psrad:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psrad %mm1, %mm0 # sched: [1:0.50]
@@ -5471,6 +5956,14 @@ define i64 @test_psraw(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psraw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psraw %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psraw $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psraw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psraw %mm1, %mm0 # sched: [1:0.50]
@@ -5561,6 +6054,14 @@ define i64 @test_psrld(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psrld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psrld %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psrld $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psrld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psrld %mm1, %mm0 # sched: [1:0.50]
@@ -5651,6 +6152,14 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psrlq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psrlq $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psrlq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:0.50]
@@ -5741,6 +6250,14 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psrlw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00]
+; BDVER2-NEXT: psrlw $7, %mm0 # sched: [1:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psrlw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:0.50]
@@ -5823,6 +6340,13 @@ define i64 @test_psubb(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubb %mm1, %mm0 # sched: [1:0.50]
@@ -5901,6 +6425,13 @@ define i64 @test_psubd(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubd %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubd %mm1, %mm0 # sched: [1:0.50]
@@ -5979,6 +6510,13 @@ define i64 @test_psubq(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubq %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubq %mm1, %mm0 # sched: [1:0.50]
@@ -6057,6 +6595,13 @@ define i64 @test_psubsb(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50]
@@ -6135,6 +6680,13 @@ define i64 @test_psubsw(x86_mmx %a0, x86
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50]
@@ -6213,6 +6765,13 @@ define i64 @test_psubusb(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubusb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubusb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50]
@@ -6291,6 +6850,13 @@ define i64 @test_psubusw(x86_mmx %a0, x8
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubusw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubusw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50]
@@ -6369,6 +6935,13 @@ define i64 @test_psubw(x86_mmx %a0, x86_
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_psubw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: psubw %mm1, %mm0 # sched: [3:1.00]
+; BDVER2-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_psubw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: psubw %mm1, %mm0 # sched: [1:0.50]
@@ -6447,6 +7020,13 @@ define i64 @test_punpckhbw(x86_mmx %a0,
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpckhbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
+; BDVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpckhbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
@@ -6525,6 +7105,13 @@ define i64 @test_punpckhdq(x86_mmx %a0,
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpckhdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
+; BDVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpckhdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
@@ -6603,6 +7190,13 @@ define i64 @test_punpckhwd(x86_mmx %a0,
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpckhwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
+; BDVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpckhwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
@@ -6681,6 +7275,13 @@ define i64 @test_punpcklbw(x86_mmx %a0,
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpcklbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
+; BDVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpcklbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
@@ -6759,6 +7360,13 @@ define i64 @test_punpckldq(x86_mmx %a0,
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpckldq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
+; BDVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpckldq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50]
@@ -6837,6 +7445,13 @@ define i64 @test_punpcklwd(x86_mmx %a0,
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_punpcklwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
+; BDVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_punpcklwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50]
@@ -6915,6 +7530,13 @@ define i64 @test_pxor(x86_mmx %a0, x86_m
; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pxor:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.33]
+; BDVER2-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50]
+; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pxor:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.50]
Modified: llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll Sat Oct 27 13:36:11 2018
@@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -60,6 +61,14 @@ define i16 @test_ctpop_i16(i16 zeroext %
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctpop_i16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: popcntw (%rsi), %cx # sched: [9:1.00]
+; BDVER2-NEXT: popcntw %di, %ax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctpop_i16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: popcntw (%rsi), %cx # sched: [4:1.00]
@@ -126,6 +135,13 @@ define i32 @test_ctpop_i32(i32 %a0, i32
; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctpop_i32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: popcntl (%rsi), %ecx # sched: [9:1.00]
+; BDVER2-NEXT: popcntl %edi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctpop_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: popcntl (%rsi), %ecx # sched: [4:1.00]
@@ -190,6 +206,13 @@ define i64 @test_ctpop_i64(i64 %a0, i64
; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ctpop_i64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00]
+; BDVER2-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ctpop_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: popcntq (%rsi), %rcx # sched: [4:1.00]
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll Sat Oct 27 13:36:11 2018
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+fma4 -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
@@ -37,6 +38,12 @@ define float @f32_no_estimate(float %x)
; FMA-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_no_estimate:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -105,6 +112,13 @@ define float @f32_one_step(float %x) #1
; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_one_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -202,6 +216,16 @@ define float @f32_two_step(float %x) #2
; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_two_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -300,6 +324,12 @@ define <4 x float> @v4f32_no_estimate(<4
; FMA-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_no_estimate:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50]
+; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [5:1.00]
@@ -368,6 +398,13 @@ define <4 x float> @v4f32_one_step(<4 x
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_one_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
@@ -467,6 +504,16 @@ define <4 x float> @v4f32_two_step(<4 x
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_two_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [5:1.00]
@@ -568,6 +615,12 @@ define <8 x float> @v8f32_no_estimate(<8
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_no_estimate:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:28.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -643,6 +696,13 @@ define <8 x float> @v8f32_one_step(<8 x
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_one_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -755,6 +815,16 @@ define <8 x float> @v8f32_two_step(<8 x
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_two_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -864,6 +934,13 @@ define <16 x float> @v16f32_no_estimate(
; FMA-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_no_estimate:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [29:28.00]
+; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [29:28.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -966,6 +1043,17 @@ define <16 x float> @v16f32_one_step(<16
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_one_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -1136,6 +1224,21 @@ define <16 x float> @v16f32_two_step(<16
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_two_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll Sat Oct 27 13:36:11 2018
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+fma4 -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
@@ -30,6 +31,12 @@ define float @f32_no_step_2(float %x) #3
; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_no_step_2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_no_step_2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
@@ -101,6 +108,14 @@ define float @f32_one_step_2(float %x) #
; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_one_step_2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_one_step_2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -196,6 +211,15 @@ define float @f32_one_step_2_divs(float
; FMA-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_one_step_2_divs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -309,6 +333,17 @@ define float @f32_two_step_2(float %x) #
; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: f32_two_step_2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: f32_two_step_2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -425,6 +460,14 @@ define <4 x float> @v4f32_one_step2(<4 x
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_one_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_one_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
@@ -522,6 +565,15 @@ define <4 x float> @v4f32_one_step_2_div
; FMA-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_one_step_2_divs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
@@ -637,6 +689,17 @@ define <4 x float> @v4f32_two_step2(<4 x
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v4f32_two_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v4f32_two_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [5:1.00]
@@ -761,6 +824,14 @@ define <8 x float> @v8f32_one_step2(<8 x
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_one_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_one_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -867,6 +938,15 @@ define <8 x float> @v8f32_one_step_2_div
; FMA-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_one_step_2_divs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -996,6 +1076,17 @@ define <8 x float> @v8f32_two_step2(<8 x
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_two_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_two_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -1097,6 +1188,11 @@ define <8 x float> @v8f32_no_step(<8 x f
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_no_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_no_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
@@ -1151,6 +1247,12 @@ define <8 x float> @v8f32_no_step2(<8 x
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v8f32_no_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v8f32_no_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
@@ -1257,6 +1359,19 @@ define <16 x float> @v16f32_one_step2(<1
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_one_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm4, %ymm0, %ymm4, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_one_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -1415,6 +1530,21 @@ define <16 x float> @v16f32_one_step_2_d
; FMA-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_one_step_2_divs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -1613,6 +1743,23 @@ define <16 x float> @v16f32_two_step2(<1
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_two_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_two_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@@ -1755,6 +1902,12 @@ define <16 x float> @v16f32_no_step(<16
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_no_step:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_no_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
@@ -1821,6 +1974,14 @@ define <16 x float> @v16f32_no_step2(<16
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
+; BDVER2-LABEL: v16f32_no_step2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: v16f32_no_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [2:2.00]
Modified: llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll Sat Oct 27 13:36:11 2018
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+slow-shld | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+slow-shld | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER1
; uint64_t lshift10(uint64_t a, uint64_t b)
@@ -16,17 +17,17 @@ define i64 @lshift10_optsize(i64 %a, i64
; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift10_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift10_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: shldq $10, %rsi, %rax # sched: [3:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift10_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdi, %rax
-; BDVER1-NEXT: shldq $10, %rsi, %rax
-; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, 10
%shr = lshr i64 %b, 54
@@ -41,19 +42,19 @@ define i64 @lshift10(i64 %a, i64 %b) nou
; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift10:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50]
+; BDVER12-NEXT: shrq $54, %rsi # sched: [1:0.50]
+; BDVER12-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift10:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: shlq $10, %rdi # sched: [1:0.50]
; BTVER2-NEXT: shrq $54, %rsi # sched: [1:0.50]
; BTVER2-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift10:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: shlq $10, %rdi
-; BDVER1-NEXT: shrq $54, %rsi
-; BDVER1-NEXT: leaq (%rsi,%rdi), %rax
-; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, 10
%shr = lshr i64 %b, 54
@@ -74,17 +75,17 @@ define i64 @rshift10_optsize(i64 %a, i64
; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: rshift10_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: rshift10_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: shrdq $62, %rsi, %rax # sched: [3:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: rshift10_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdi, %rax
-; BDVER1-NEXT: shrdq $62, %rsi, %rax
-; BDVER1-NEXT: retq
entry:
%shl = lshr i64 %a, 62
%shr = shl i64 %b, 2
@@ -100,17 +101,17 @@ define i64 @rshift10(i64 %a, i64 %b) nou
; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: rshift10:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: shrq $62, %rdi # sched: [1:0.50]
+; BDVER12-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: rshift10:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: shrq $62, %rdi # sched: [1:0.50]
; BTVER2-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: rshift10:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: shrq $62, %rdi
-; BDVER1-NEXT: leaq (%rdi,%rsi,4), %rax
-; BDVER1-NEXT: retq
entry:
%shl = lshr i64 %a, 62
%shr = shl i64 %b, 2
@@ -132,6 +133,14 @@ define i64 @lshift_cl_optsize(i64 %a, i6
; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_cl_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
@@ -139,14 +148,6 @@ define i64 @lshift_cl_optsize(i64 %a, i6
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_cl_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdx, %rcx
-; BDVER1-NEXT: movq %rdi, %rax
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shldq %cl, %rsi, %rax
-; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, %c
%sub = sub nsw i64 64, %c
@@ -164,6 +165,17 @@ define i64 @lshift_cl(i64 %a, i64 %b, i6
; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_cl:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: shlq %cl, %rdi # sched: [3:1.50]
+; BDVER12-NEXT: negl %ecx # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shrq %cl, %rax # sched: [3:1.50]
+; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
@@ -174,17 +186,6 @@ define i64 @lshift_cl(i64 %a, i64 %b, i6
; BTVER2-NEXT: shrq %cl, %rax # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_cl:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdx, %rcx
-; BDVER1-NEXT: movq %rsi, %rax
-; BDVER1-NEXT: shlq %cl, %rdi
-; BDVER1-NEXT: negl %ecx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shrq %cl, %rax
-; BDVER1-NEXT: orq %rdi, %rax
-; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, %c
%sub = sub nsw i64 64, %c
@@ -208,6 +209,14 @@ define i64 @rshift_cl_optsize(i64 %a, i6
; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: rshift_cl_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: rshift_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
@@ -215,14 +224,6 @@ define i64 @rshift_cl_optsize(i64 %a, i6
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: rshift_cl_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdx, %rcx
-; BDVER1-NEXT: movq %rdi, %rax
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shrdq %cl, %rsi, %rax
-; BDVER1-NEXT: retq
entry:
%shr = lshr i64 %a, %c
%sub = sub nsw i64 64, %c
@@ -240,6 +241,17 @@ define i64 @rshift_cl(i64 %a, i64 %b, i6
; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: rshift_cl:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: shrq %cl, %rdi # sched: [3:1.50]
+; BDVER12-NEXT: negl %ecx # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shlq %cl, %rax # sched: [3:1.50]
+; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: rshift_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
@@ -250,17 +262,6 @@ define i64 @rshift_cl(i64 %a, i64 %b, i6
; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: rshift_cl:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rdx, %rcx
-; BDVER1-NEXT: movq %rsi, %rax
-; BDVER1-NEXT: shrq %cl, %rdi
-; BDVER1-NEXT: negl %ecx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shlq %cl, %rax
-; BDVER1-NEXT: orq %rdi, %rax
-; BDVER1-NEXT: retq
entry:
%shr = lshr i64 %a, %c
%sub = sub nsw i64 64, %c
@@ -284,19 +285,19 @@ define void @lshift_mem_cl_optsize(i64 %
; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem_cl_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [9:11.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem_cl_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rsi, %rcx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shldq %cl, %rdi, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, %c
@@ -315,6 +316,18 @@ define void @lshift_mem_cl(i64 %a, i64 %
; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem_cl:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.33]
+; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
+; BDVER12-NEXT: shlq %cl, %rax # sched: [3:1.50]
+; BDVER12-NEXT: negl %ecx # sched: [1:0.33]
+; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
+; BDVER12-NEXT: shrq %cl, %rdi # sched: [3:1.50]
+; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.33]
+; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
@@ -326,18 +339,6 @@ define void @lshift_mem_cl(i64 %a, i64 %
; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem_cl:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq %rsi, %rcx
-; BDVER1-NEXT: movq {{.*}}(%rip), %rax
-; BDVER1-NEXT: shlq %cl, %rax
-; BDVER1-NEXT: negl %ecx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT: shrq %cl, %rdi
-; BDVER1-NEXT: orq %rax, %rdi
-; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, %c
@@ -354,6 +355,15 @@ define void @lshift_mem(i64 %a) nounwind
; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
+; BDVER12-NEXT: shlq $10, %rax # sched: [1:0.50]
+; BDVER12-NEXT: shrq $54, %rdi # sched: [1:0.50]
+; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.33]
+; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
@@ -362,15 +372,6 @@ define void @lshift_mem(i64 %a) nounwind
; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq {{.*}}(%rip), %rax
-; BDVER1-NEXT: shlq $10, %rax
-; BDVER1-NEXT: shrq $54, %rdi
-; BDVER1-NEXT: orq %rax, %rdi
-; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, 10
@@ -386,15 +387,15 @@ define void @lshift_mem_optsize(i64 %a)
; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [9:11.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: shldq $10, %rdi, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, 10
@@ -412,6 +413,15 @@ define void @lshift_mem_b(i64 %b) nounwi
; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem_b:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
+; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50]
+; BDVER12-NEXT: shrq $54, %rax # sched: [1:0.50]
+; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33]
+; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem_b:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
@@ -420,15 +430,6 @@ define void @lshift_mem_b(i64 %b) nounwi
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem_b:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq {{.*}}(%rip), %rax
-; BDVER1-NEXT: shlq $10, %rdi
-; BDVER1-NEXT: shrq $54, %rax
-; BDVER1-NEXT: orq %rdi, %rax
-; BDVER1-NEXT: movq %rax, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%a = load i64, i64* @x
%shl = shl i64 %b, 10
@@ -446,19 +447,19 @@ define void @lshift_mem_b_optsize(i64 %b
; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER12-LABEL: lshift_mem_b_optsize:
+; BDVER12: # %bb.0: # %entry
+; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
+; BDVER12-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67]
+; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: lshift_mem_b_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
; BTVER2-NEXT: shrdq $54, %rdi, %rax # sched: [3:3.00]
; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; BDVER1-LABEL: lshift_mem_b_optsize:
-; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movq {{.*}}(%rip), %rax
-; BDVER1-NEXT: shrdq $54, %rdi, %rax
-; BDVER1-NEXT: movq %rax, {{.*}}(%rip)
-; BDVER1-NEXT: retq
entry:
%a = load i64, i64* @x
%shl = shl i64 %b, 10
Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll Sat Oct 27 13:36:11 2018
@@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -76,6 +77,14 @@ define i8 @test_aaa(i8 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_aaa:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: aaa # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_aaa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
@@ -168,6 +177,15 @@ define void @test_aad(i16 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_aad:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: aad # sched: [100:0.33]
+; BDVER2-NEXT: aad $16 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_aad:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
@@ -262,6 +280,15 @@ define void @test_aam(i8 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_aam:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: aam # sched: [100:0.33]
+; BDVER2-NEXT: aam $16 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_aam:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
@@ -348,6 +375,14 @@ define i8 @test_aas(i8 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_aas:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: aas # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_aas:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
@@ -440,6 +475,15 @@ define void @test_arpl(i16 %a0, i16 *%a1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_arpl:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_arpl:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
@@ -598,6 +642,23 @@ define void @test_bound(i16 %a0, i16 *%a
; SKX-NEXT: .cfi_def_cfa_offset 4
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_bound:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: pushl %esi # sched: [5:1.00]
+; BDVER2-NEXT: .cfi_def_cfa_offset 8
+; BDVER2-NEXT: .cfi_offset %esi, -8
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bound %ax, (%esi) # sched: [100:0.33]
+; BDVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: popl %esi # sched: [6:0.50]
+; BDVER2-NEXT: .cfi_def_cfa_offset 4
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_bound:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pushl %esi # sched: [1:1.00]
@@ -702,6 +763,14 @@ define i8 @test_daa(i8 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_daa:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: daa # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_daa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
@@ -786,6 +855,14 @@ define i8 @test_das(i8 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_das:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: das # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_das:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
@@ -886,6 +963,16 @@ define void @test_dec16(i16 %a0, i16* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_dec16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decw %ax # sched: [1:0.33]
+; BDVER2-NEXT: decw (%ecx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_dec16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
@@ -989,6 +1076,16 @@ define void @test_dec32(i32 %a0, i32* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_dec32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decl %eax # sched: [1:0.33]
+; BDVER2-NEXT: decl (%ecx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_dec32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -1093,6 +1190,16 @@ define void @test_inc16(i16 %a0, i16* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_inc16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incw %ax # sched: [1:0.33]
+; BDVER2-NEXT: incw (%ecx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_inc16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
@@ -1196,6 +1303,16 @@ define void @test_inc32(i32 %a0, i32* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_inc32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incl %eax # sched: [1:0.33]
+; BDVER2-NEXT: incl (%ecx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_inc32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -1276,6 +1393,13 @@ define void @test_into() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_into:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: into # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_into:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1368,6 +1492,15 @@ define void @test_jcxz_jecxz() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_jcxz_jecxz:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: JXTGT:
+; BDVER2-NEXT: jcxz JXTGT # sched: [2:1.00]
+; BDVER2-NEXT: jecxz JXTGT # sched: [2:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_jcxz_jecxz:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1448,6 +1581,13 @@ define void @test_leave() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_leave:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: leave # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_leave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1604,6 +1744,23 @@ define void @test_pop_push() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_pop_push:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popl %ds # sched: [100:0.33]
+; BDVER2-NEXT: popl %es # sched: [100:0.33]
+; BDVER2-NEXT: popl %ss # sched: [100:0.33]
+; BDVER2-NEXT: popl %fs # sched: [100:0.33]
+; BDVER2-NEXT: popl %gs # sched: [100:0.33]
+; BDVER2-NEXT: pushl %cs # sched: [100:0.33]
+; BDVER2-NEXT: pushl %ds # sched: [100:0.33]
+; BDVER2-NEXT: pushl %es # sched: [100:0.33]
+; BDVER2-NEXT: pushl %ss # sched: [100:0.33]
+; BDVER2-NEXT: pushl %fs # sched: [100:0.33]
+; BDVER2-NEXT: pushl %gs # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_pop_push:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1760,6 +1917,21 @@ define i16 @test_pop_push_16(i16 %a0, i1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_pop_push_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popw %ax # sched: [6:0.50]
+; BDVER2-NEXT: popw (%ecx) # sched: [6:0.50]
+; BDVER2-NEXT: pushw %ax # sched: [5:1.00]
+; BDVER2-NEXT: pushw (%ecx) # sched: [5:1.00]
+; BDVER2-NEXT: pushw $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [1:1.00]
+; BDVER2-NEXT: pushw $7 # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_pop_push_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
@@ -1912,6 +2084,21 @@ define i32 @test_pop_push_32(i32 %a0, i3
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_pop_push_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popl %eax # sched: [6:0.50]
+; BDVER2-NEXT: popl (%ecx) # sched: [6:0.50]
+; BDVER2-NEXT: pushl %eax # sched: [5:1.00]
+; BDVER2-NEXT: pushl (%ecx) # sched: [5:1.00]
+; BDVER2-NEXT: pushl $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [1:1.00]
+; BDVER2-NEXT: pushl $7 # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_pop_push_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -2026,6 +2213,16 @@ define void @test_popa_popf_pusha_pushf(
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_popa_popf_pusha_pushf:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popal # sched: [5:0.50]
+; BDVER2-NEXT: popfl # sched: [5:0.50]
+; BDVER2-NEXT: pushal # sched: [1:1.00]
+; BDVER2-NEXT: pushfl # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_popa_popf_pusha_pushf:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2144,6 +2341,18 @@ define void @test_ret() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_ret:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+; BDVER2-NEXT: retl $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [6:1.00]
+; BDVER2-NEXT: lretl # sched: [6:1.00]
+; BDVER2-NEXT: lretl $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_ret:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2228,6 +2437,13 @@ define i8 @test_salc() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_salc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: salc # sched: [1:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_salc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2345,6 +2561,18 @@ define void @test_xchg_32(i32 %a0, i32 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_xchg_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xchgl %eax, %eax # sched: [2:1.00]
+; BDVER2-NEXT: xchgl %ecx, %eax # sched: [2:1.00]
+; BDVER2-NEXT: xchgl %eax, (%edx) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_xchg_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll Sat Oct 27 13:36:11 2018
@@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -108,6 +109,18 @@ define void @test_adc_8(i8 %a0, i8* %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_adc_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: adcb $7, %al # sched: [2:0.67]
+; BDVER2-NEXT: adcb $7, %dil # sched: [2:0.67]
+; BDVER2-NEXT: adcb $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcb %dl, %dil # sched: [2:0.67]
+; BDVER2-NEXT: adcb %dil, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcb (%rsi), %dil # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_adc_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -271,6 +284,23 @@ define void @test_adc_16(i16 %a0, i16* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_adc_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: adcw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: adcw $7, %di # sched: [2:0.67]
+; BDVER2-NEXT: adcw $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcw %dx, %di # sched: [2:0.67]
+; BDVER2-NEXT: adcw %di, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcw (%rsi), %di # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_adc_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -444,6 +474,23 @@ define void @test_adc_32(i32 %a0, i32* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_adc_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: adcl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: adcl $7, %edi # sched: [2:0.67]
+; BDVER2-NEXT: adcl $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcl %edx, %edi # sched: [2:0.67]
+; BDVER2-NEXT: adcl %edi, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcl (%rsi), %edi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_adc_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -617,6 +664,23 @@ define void @test_adc_64(i64 %a0, i64* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_adc_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: adcq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: adcq $7, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: adcq $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcq %rdx, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: adcq %rdi, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: adcq (%rsi), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_adc_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -751,6 +815,18 @@ define void @test_add_8(i8 %a0, i8* %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_add_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: addb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: addb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: addb $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addb %dl, %dil # sched: [1:0.33]
+; BDVER2-NEXT: addb %dil, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_add_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -914,6 +990,23 @@ define void @test_add_16(i16 %a0, i16* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_add_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: addw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: addw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: addw $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addw %dx, %di # sched: [1:0.33]
+; BDVER2-NEXT: addw %di, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_add_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1087,6 +1180,23 @@ define void @test_add_32(i32 %a0, i32* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_add_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: addl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: addl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: addl $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addl %edx, %edi # sched: [1:0.33]
+; BDVER2-NEXT: addl %edi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_add_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1260,6 +1370,23 @@ define void @test_add_64(i64 %a0, i64* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_add_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: addq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: addq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: addq $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addq %rdx, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: addq %rdi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: addq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_add_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1394,6 +1521,18 @@ define void @test_and_8(i8 %a0, i8* %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_and_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: andb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: andb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: andb $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andb %dl, %dil # sched: [1:0.33]
+; BDVER2-NEXT: andb %dil, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_and_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1557,6 +1696,23 @@ define void @test_and_16(i16 %a0, i16* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_and_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: andw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: andw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: andw $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andw %dx, %di # sched: [1:0.33]
+; BDVER2-NEXT: andw %di, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_and_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1730,6 +1886,23 @@ define void @test_and_32(i32 %a0, i32* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_and_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: andl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: andl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: andl $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andl %edx, %edi # sched: [1:0.33]
+; BDVER2-NEXT: andl %edi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_and_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1903,6 +2076,23 @@ define void @test_and_64(i64 %a0, i64* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_and_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: andq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: andq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: andq $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andq %rdx, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: andq %rdi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: andq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_and_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2021,6 +2211,16 @@ define i16 @test_bsf16(i16 %a0, i16* %a1
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsf16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsfw %di, %ax # sched: [3:1.00]
+; BDVER2-NEXT: bsfw (%rsi), %cx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsf16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2119,6 +2319,15 @@ define i32 @test_bsf32(i32 %a0, i32* %a1
; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsf32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsfl %edi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsf32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2215,6 +2424,15 @@ define i64 @test_bsf64(i64 %a0, i64* %a1
; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsf64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsfq %rdi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsf64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2320,6 +2538,16 @@ define i16 @test_bsr16(i16 %a0, i16* %a1
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsr16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsrw %di, %ax # sched: [3:1.00]
+; BDVER2-NEXT: bsrw (%rsi), %cx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsr16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2418,6 +2646,15 @@ define i32 @test_bsr32(i32 %a0, i32* %a1
; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsr32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsrl %edi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsr32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2514,6 +2751,15 @@ define i64 @test_bsr64(i64 %a0, i64* %a1
; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bsr64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: bsrq %rdi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bsr64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2587,6 +2833,12 @@ define i32 @test_bswap32(i32 %a0) optsiz
; SKX-NEXT: bswapl %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bswap32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: bswapl %eax # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bswap32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
@@ -2650,6 +2902,12 @@ define i64 @test_bswap64(i64 %a0) optsiz
; SKX-NEXT: bswapq %rax # sched: [2:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bswap64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: bswapq %rax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bswap64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
@@ -2842,6 +3100,28 @@ define void @test_bt_btc_btr_bts_16(i16
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bt_btc_btr_bts_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: btw %si, %di # sched: [1:0.50]
+; BDVER2-NEXT: btcw %si, %di # sched: [1:0.50]
+; BDVER2-NEXT: btrw %si, %di # sched: [1:0.50]
+; BDVER2-NEXT: btsw %si, %di # sched: [1:0.50]
+; BDVER2-NEXT: btw %si, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btcw %si, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btrw %si, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btsw %si, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: btcw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: btrw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: btsw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: btw $7, (%rdx) # sched: [6:0.50]
+; BDVER2-NEXT: btcw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btrw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btsw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bt_btc_btr_bts_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3065,6 +3345,28 @@ define void @test_bt_btc_btr_bts_32(i32
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bt_btc_btr_bts_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: btl %esi, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btcl %esi, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btrl %esi, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btsl %esi, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btl %esi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btcl %esi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btrl %esi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btsl %esi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btcl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btrl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btsl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: btl $7, (%rdx) # sched: [6:0.50]
+; BDVER2-NEXT: btcl $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btrl $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btsl $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bt_btc_btr_bts_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3288,6 +3590,28 @@ define void @test_bt_btc_btr_bts_64(i64
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_bt_btc_btr_bts_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: btq %rsi, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btcq %rsi, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btrq %rsi, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btsq %rsi, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btq %rsi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btcq %rsi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btrq %rsi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btsq %rsi, (%rdx) # sched: [9:1.00]
+; BDVER2-NEXT: btq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btcq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btrq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btsq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: btq $7, (%rdx) # sched: [6:0.50]
+; BDVER2-NEXT: btcq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btrq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: btsq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_bt_btc_btr_bts_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3434,6 +3758,18 @@ define void @test_cbw_cdq_cdqe_cqo_cwd_c
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cbtw # sched: [1:0.33]
+; BDVER2-NEXT: cltd # sched: [1:0.50]
+; BDVER2-NEXT: cltq # sched: [1:0.33]
+; BDVER2-NEXT: cqto # sched: [1:0.50]
+; BDVER2-NEXT: cwtd # sched: [2:1.00]
+; BDVER2-NEXT: cwtl # sched: [1:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3534,6 +3870,15 @@ define void @test_clc_cld_cmc() optsize
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_clc_cld_cmc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: clc # sched: [1:0.25]
+; BDVER2-NEXT: cld # sched: [1:0.33]
+; BDVER2-NEXT: cmc # sched: [1:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_clc_cld_cmc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3652,6 +3997,18 @@ define void @test_cmp_8(i8 %a0, i8* %a1)
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmp_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: cmpb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: cmpb $7, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpb %dil, %dil # sched: [1:0.33]
+; BDVER2-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmp_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3815,6 +4172,23 @@ define void @test_cmp_16(i16 %a0, i16* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmp_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: cmpw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: cmpw $7, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpw %di, %di # sched: [1:0.33]
+; BDVER2-NEXT: cmpw %di, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmp_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3988,6 +4362,23 @@ define void @test_cmp_32(i32 %a0, i32* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmp_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: cmpl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: cmpl $7, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpl %edi, %edi # sched: [1:0.33]
+; BDVER2-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmp_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4161,6 +4552,23 @@ define void @test_cmp_64(i64 %a0, i64* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmp_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: cmpq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: cmpq $7, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpq %rdi, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmp_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4279,6 +4687,16 @@ define void @test_cmps() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [8:1.00]
+; BDVER2-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [8:1.00]
+; BDVER2-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [8:1.00]
+; BDVER2-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4367,6 +4785,14 @@ define void @test_cmpxchg_8(i8 %a0, i8 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpxchg_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpxchgb %dil, %sil # sched: [5:1.33]
+; BDVER2-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpxchg_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4450,6 +4876,14 @@ define void @test_cmpxchg_16(i16 %a0, i1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpxchg_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpxchgw %di, %si # sched: [5:1.33]
+; BDVER2-NEXT: cmpxchgw %di, (%rdx) # sched: [8:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpxchg_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4533,6 +4967,14 @@ define void @test_cmpxchg_32(i32 %a0, i3
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpxchg_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpxchgl %edi, %esi # sched: [5:1.33]
+; BDVER2-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpxchg_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4616,6 +5058,14 @@ define void @test_cmpxchg_64(i64 %a0, i6
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpxchg_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.33]
+; BDVER2-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpxchg_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4699,6 +5149,14 @@ define void @test_cmpxchg8b_cmpxchg16b(i
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cmpxchg8b_cmpxchg16b:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cmpxchg8b (%rdi) # sched: [6:1.00]
+; BDVER2-NEXT: cmpxchg16b (%rdi) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cmpxchg8b_cmpxchg16b:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4775,6 +5233,13 @@ define void @test_cpuid() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_cpuid:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: cpuid # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_cpuid:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4857,6 +5322,14 @@ define void @test_dec8(i8 %a0, i8* %a1)
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_dec8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decb %dil # sched: [1:0.33]
+; BDVER2-NEXT: decb (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_dec8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4940,6 +5413,14 @@ define void @test_dec16(i16 %a0, i16* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_dec16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decw %di # sched: [1:0.33]
+; BDVER2-NEXT: decw (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_dec16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5023,6 +5504,14 @@ define void @test_dec32(i32 %a0, i32* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_dec32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decl %edi # sched: [1:0.33]
+; BDVER2-NEXT: decl (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_dec32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5106,6 +5595,14 @@ define void @test_dec64(i64 %a0, i64* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_dec64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: decq %rdi # sched: [1:0.33]
+; BDVER2-NEXT: decq (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_dec64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5254,6 +5751,22 @@ define void @test_div(i8 %a0, i16 %a1, i
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_div:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: divb %dil # sched: [25:10.00]
+; BDVER2-NEXT: divb (%r8) # sched: [30:10.00]
+; BDVER2-NEXT: divw %si # sched: [25:10.00]
+; BDVER2-NEXT: divw (%r9) # sched: [30:10.00]
+; BDVER2-NEXT: divl %edx # sched: [25:10.00]
+; BDVER2-NEXT: divl (%rax) # sched: [30:10.00]
+; BDVER2-NEXT: divq %rcx # sched: [25:10.00]
+; BDVER2-NEXT: divq (%r10) # sched: [30:10.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_div:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:1.00]
@@ -5354,6 +5867,14 @@ define void @test_enter() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_enter:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: enter $7, $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_enter:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5502,6 +6023,22 @@ define void @test_idiv(i8 %a0, i16 %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_idiv:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: idivb %dil # sched: [25:10.00]
+; BDVER2-NEXT: idivb (%r8) # sched: [30:10.00]
+; BDVER2-NEXT: idivw %si # sched: [25:10.00]
+; BDVER2-NEXT: idivw (%r9) # sched: [30:10.00]
+; BDVER2-NEXT: idivl %edx # sched: [25:10.00]
+; BDVER2-NEXT: idivl (%rax) # sched: [30:10.00]
+; BDVER2-NEXT: idivq %rcx # sched: [25:10.00]
+; BDVER2-NEXT: idivq (%r10) # sched: [30:10.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_idiv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:1.00]
@@ -5602,6 +6139,14 @@ define void @test_imul_8(i8 %a0, i8* %a1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_imul_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: imulb %dil # sched: [3:1.00]
+; BDVER2-NEXT: imulb (%rsi) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_imul_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5749,6 +6294,22 @@ define void @test_imul_16(i16 %a0, i16*
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_imul_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: imulw %di # sched: [4:1.33]
+; BDVER2-NEXT: imulw (%rsi) # sched: [9:1.33]
+; BDVER2-NEXT: imulw %dx, %di # sched: [3:1.00]
+; BDVER2-NEXT: imulw (%rsi), %di # sched: [8:1.00]
+; BDVER2-NEXT: imulw $511, %di, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [4:1.00]
+; BDVER2-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [8:1.00]
+; BDVER2-NEXT: imulw $7, %di, %di # sched: [4:1.00]
+; BDVER2-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_imul_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5912,6 +6473,22 @@ define void @test_imul_32(i32 %a0, i32*
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_imul_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: imull %edi # sched: [4:1.00]
+; BDVER2-NEXT: imull (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: imull %edx, %edi # sched: [3:1.00]
+; BDVER2-NEXT: imull (%rsi), %edi # sched: [8:1.00]
+; BDVER2-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [3:1.00]
+; BDVER2-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [8:1.00]
+; BDVER2-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
+; BDVER2-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_imul_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -6075,6 +6652,22 @@ define void @test_imul_64(i64 %a0, i64*
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_imul_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: imulq %rdi # sched: [4:1.00]
+; BDVER2-NEXT: imulq (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: imulq %rdx, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [3:1.00]
+; BDVER2-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [8:1.00]
+; BDVER2-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00]
+; BDVER2-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_imul_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -6207,6 +6800,18 @@ define void @test_in() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_in:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: inb $7, %al # sched: [100:0.33]
+; BDVER2-NEXT: inw $7, %ax # sched: [100:0.33]
+; BDVER2-NEXT: inl $7, %eax # sched: [100:0.33]
+; BDVER2-NEXT: inb %dx, %al # sched: [100:0.33]
+; BDVER2-NEXT: inw %dx, %ax # sched: [100:0.33]
+; BDVER2-NEXT: inl %dx, %eax # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_in:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -6299,6 +6904,14 @@ define void @test_inc8(i8 %a0, i8* %a1)
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_inc8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incb %dil # sched: [1:0.33]
+; BDVER2-NEXT: incb (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_inc8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -6382,6 +6995,14 @@ define void @test_inc16(i16 %a0, i16* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_inc16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incw %di # sched: [1:0.33]
+; BDVER2-NEXT: incw (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_inc16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -6465,6 +7086,14 @@ define void @test_inc32(i32 %a0, i32* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_inc32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incl %edi # sched: [1:0.33]
+; BDVER2-NEXT: incl (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_inc32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -6548,6 +7177,14 @@ define void @test_inc64(i64 %a0, i64* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_inc64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: incq %rdi # sched: [1:0.33]
+; BDVER2-NEXT: incq (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_inc64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -6640,6 +7277,15 @@ define void @test_ins() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ins:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.33]
+; BDVER2-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.33]
+; BDVER2-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ins:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -6718,6 +7364,13 @@ define void @test_int() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_int:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: int $7 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_int:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -6800,6 +7453,14 @@ define void @test_invlpg_invlpga(i8 *%a0
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_invlpg_invlpga:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: invlpg (%rdi) # sched: [100:0.33]
+; BDVER2-NEXT: invlpga %rax, %ecx # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_invlpg_invlpga:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -7116,6 +7777,43 @@ define void @test_jcc() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_jcc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: JCCTGT:
+; BDVER2-NEXT: jo JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jno JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: je JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: je JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jne JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jne JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jbe JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jbe JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: ja JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: ja JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: js JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jns JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jp JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jp JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jnp JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jnp JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jl JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jl JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jge JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jge JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jle JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jle JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jg JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: jg JCCTGT # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_jcc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -7266,6 +7964,15 @@ define void @test_jecxz_jrcxz() optsize
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_jecxz_jrcxz:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: JXTGT:
+; BDVER2-NEXT: jecxz JXTGT # sched: [2:1.00]
+; BDVER2-NEXT: jrcxz JXTGT # sched: [2:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_jecxz_jrcxz:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -7354,6 +8061,14 @@ define void @test_lahf_sahf() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lahf_sahf:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: lahf # sched: [1:0.50]
+; BDVER2-NEXT: sahf # sched: [1:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lahf_sahf:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -7438,6 +8153,13 @@ define void @test_leave() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_leave:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: leave # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_leave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -7536,6 +8258,16 @@ define void @test_lods() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_lods:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: lodsb (%rsi), %al # sched: [7:0.67]
+; BDVER2-NEXT: lodsw (%rsi), %ax # sched: [7:0.67]
+; BDVER2-NEXT: lodsl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: lodsq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_lods:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -7640,6 +8372,16 @@ define void @test_loop() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_loop:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: LTGT:
+; BDVER2-NEXT: loop LTGT # sched: [1:1.00]
+; BDVER2-NEXT: loope LTGT # sched: [1:1.00]
+; BDVER2-NEXT: loopne LTGT # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_loop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -7730,6 +8472,14 @@ define void @test_movnti(i32 %a0, i32 *%
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movnti:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movnti:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -7830,6 +8580,16 @@ define void @test_movs() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: movsb (%rsi), %es:(%rdi) # sched: [8:1.00]
+; BDVER2-NEXT: movsw (%rsi), %es:(%rdi) # sched: [8:1.00]
+; BDVER2-NEXT: movsl (%rsi), %es:(%rdi) # sched: [8:1.00]
+; BDVER2-NEXT: movsq (%rsi), %es:(%rdi) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -7929,6 +8689,15 @@ define i64 @test_movslq(i32 %a0, i32 *%a
; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_movslq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: movslq %edi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: movslq (%rsi), %rcx # sched: [5:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movslq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -8082,6 +8851,22 @@ define void @test_mul(i8 %a0, i16 %a1, i
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_mul:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: mulb %dil # sched: [3:1.00]
+; BDVER2-NEXT: mulb (%r8) # sched: [8:1.00]
+; BDVER2-NEXT: mulw %si # sched: [4:1.33]
+; BDVER2-NEXT: mulw (%r9) # sched: [9:1.33]
+; BDVER2-NEXT: mull %edx # sched: [4:1.00]
+; BDVER2-NEXT: mull (%rax) # sched: [9:1.00]
+; BDVER2-NEXT: mulq %rcx # sched: [4:1.00]
+; BDVER2-NEXT: mulq (%r10) # sched: [9:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_mul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:1.00]
@@ -8246,6 +9031,22 @@ define void @test_neg(i8 %a0, i16 %a1, i
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_neg:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: negb %dil # sched: [1:0.33]
+; BDVER2-NEXT: negb (%r8) # sched: [7:1.00]
+; BDVER2-NEXT: negw %si # sched: [1:0.33]
+; BDVER2-NEXT: negw (%r9) # sched: [7:1.00]
+; BDVER2-NEXT: negl %edx # sched: [1:0.33]
+; BDVER2-NEXT: negl (%rax) # sched: [7:1.00]
+; BDVER2-NEXT: negq %rcx # sched: [1:0.33]
+; BDVER2-NEXT: negq (%r10) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_neg:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:1.00]
@@ -8386,6 +9187,19 @@ define void @test_nop(i16 %a0, i32 %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_nop:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: nop # sched: [1:0.25]
+; BDVER2-NEXT: nopw %di # sched: [1:0.25]
+; BDVER2-NEXT: nopw (%rcx) # sched: [1:0.25]
+; BDVER2-NEXT: nopl %esi # sched: [1:0.25]
+; BDVER2-NEXT: nopl (%r8) # sched: [1:0.25]
+; BDVER2-NEXT: nopq %rdx # sched: [1:0.25]
+; BDVER2-NEXT: nopq (%r9) # sched: [1:0.25]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_nop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -8544,6 +9358,22 @@ define void @test_not(i8 %a0, i16 %a1, i
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_not:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
+; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: notb %dil # sched: [1:0.33]
+; BDVER2-NEXT: notb (%r8) # sched: [7:1.00]
+; BDVER2-NEXT: notw %si # sched: [1:0.33]
+; BDVER2-NEXT: notw (%r9) # sched: [7:1.00]
+; BDVER2-NEXT: notl %edx # sched: [1:0.33]
+; BDVER2-NEXT: notl (%rax) # sched: [7:1.00]
+; BDVER2-NEXT: notq %rcx # sched: [1:0.33]
+; BDVER2-NEXT: notq (%r10) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_not:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:1.00]
@@ -8676,6 +9506,18 @@ define void @test_or_8(i8 %a0, i8* %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_or_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: orb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: orb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: orb $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orb %dl, %dil # sched: [1:0.33]
+; BDVER2-NEXT: orb %dil, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_or_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -8839,6 +9681,23 @@ define void @test_or_16(i16 %a0, i16* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_or_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: orw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: orw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: orw $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orw %dx, %di # sched: [1:0.33]
+; BDVER2-NEXT: orw %di, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_or_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -9012,6 +9871,23 @@ define void @test_or_32(i32 %a0, i32* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_or_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: orl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: orl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: orl $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orl %edx, %edi # sched: [1:0.33]
+; BDVER2-NEXT: orl %edi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_or_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -9185,6 +10061,23 @@ define void @test_or_64(i64 %a0, i64* %a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_or_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: orq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: orq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: orq $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orq %rdx, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: orq %rdi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: orq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_or_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -9319,6 +10212,18 @@ define void @test_out() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_out:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: outb %al, $7 # sched: [100:0.33]
+; BDVER2-NEXT: outw %ax, $7 # sched: [100:0.33]
+; BDVER2-NEXT: outl %eax, $7 # sched: [100:0.33]
+; BDVER2-NEXT: outb %al, %dx # sched: [100:0.33]
+; BDVER2-NEXT: outw %ax, %dx # sched: [100:0.33]
+; BDVER2-NEXT: outl %eax, %dx # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_out:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -9419,6 +10324,15 @@ define void @test_outs() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_outs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: outsb (%rsi), %dx # sched: [100:0.33]
+; BDVER2-NEXT: outsw (%rsi), %dx # sched: [100:0.33]
+; BDVER2-NEXT: outsl (%rsi), %dx # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_outs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -9497,6 +10411,13 @@ define void @test_pause() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pause:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: pause # sched: [4:1.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pause:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -9595,6 +10516,16 @@ define void @test_pop_push() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pop_push:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popq %fs # sched: [100:0.33]
+; BDVER2-NEXT: popq %gs # sched: [100:0.33]
+; BDVER2-NEXT: pushq %fs # sched: [3:1.00]
+; BDVER2-NEXT: pushq %gs # sched: [5:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pop_push:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -9722,6 +10653,19 @@ define i16 @test_pop_push_16(i16 %a0, i1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pop_push_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popw %ax # sched: [6:0.50]
+; BDVER2-NEXT: popw (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: pushw %di # sched: [5:1.00]
+; BDVER2-NEXT: pushw (%rsi) # sched: [5:1.00]
+; BDVER2-NEXT: pushw $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [1:1.00]
+; BDVER2-NEXT: pushw $7 # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pop_push_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -9855,6 +10799,19 @@ define i64 @test_pop_push_64(i64 %a0, i6
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_pop_push_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popq %rax # sched: [6:0.50]
+; BDVER2-NEXT: popq (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: pushq %rdi # sched: [5:1.00]
+; BDVER2-NEXT: pushq (%rsi) # sched: [5:1.00]
+; BDVER2-NEXT: pushq $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [1:1.00]
+; BDVER2-NEXT: pushq $7 # sched: [5:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_pop_push_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -9949,6 +10906,14 @@ define void @test_popf_pushf() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_popf_pushf:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: popfq # sched: [5:0.50]
+; BDVER2-NEXT: pushfq # sched: [5:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_popf_pushf:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -10113,6 +11078,24 @@ define void @test_rcl_rcr_8(i8 %a0, i8 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rcl_rcr_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rclb %dil # sched: [2:1.50]
+; BDVER2-NEXT: rcrb %dil # sched: [2:1.50]
+; BDVER2-NEXT: rclb (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrb (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclb $7, %dil # sched: [5:4.00]
+; BDVER2-NEXT: rcrb $7, %dil # sched: [5:4.00]
+; BDVER2-NEXT: rclb $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrb $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclb %cl, %dil # sched: [5:4.00]
+; BDVER2-NEXT: rcrb %cl, %dil # sched: [5:4.00]
+; BDVER2-NEXT: rclb %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrb %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rcl_rcr_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -10296,6 +11279,24 @@ define void @test_rcl_rcr_16(i16 %a0, i1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rcl_rcr_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rclw %di # sched: [2:1.50]
+; BDVER2-NEXT: rcrw %di # sched: [2:1.50]
+; BDVER2-NEXT: rclw (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrw (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclw $7, %di # sched: [5:4.00]
+; BDVER2-NEXT: rcrw $7, %di # sched: [5:4.00]
+; BDVER2-NEXT: rclw $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrw $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclw %cl, %di # sched: [5:4.00]
+; BDVER2-NEXT: rcrw %cl, %di # sched: [5:4.00]
+; BDVER2-NEXT: rclw %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrw %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rcl_rcr_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -10479,6 +11480,24 @@ define void @test_rcl_rcr_32(i32 %a0, i3
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rcl_rcr_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rcll %edi # sched: [2:1.50]
+; BDVER2-NEXT: rcrl %edi # sched: [2:1.50]
+; BDVER2-NEXT: rcll (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrl (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcll $7, %edi # sched: [5:4.00]
+; BDVER2-NEXT: rcrl $7, %edi # sched: [5:4.00]
+; BDVER2-NEXT: rcll $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrl $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcll %cl, %edi # sched: [5:4.00]
+; BDVER2-NEXT: rcrl %cl, %edi # sched: [5:4.00]
+; BDVER2-NEXT: rcll %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrl %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rcl_rcr_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -10662,6 +11681,24 @@ define void @test_rcl_rcr_64(i64 %a0, i6
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rcl_rcr_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rclq %rdi # sched: [2:1.50]
+; BDVER2-NEXT: rcrq %rdi # sched: [2:1.50]
+; BDVER2-NEXT: rclq (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrq (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclq $7, %rdi # sched: [5:4.00]
+; BDVER2-NEXT: rcrq $7, %rdi # sched: [5:4.00]
+; BDVER2-NEXT: rclq $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrq $7, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rclq %cl, %rdi # sched: [5:4.00]
+; BDVER2-NEXT: rcrq %cl, %rdi # sched: [5:4.00]
+; BDVER2-NEXT: rclq %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: rcrq %cl, (%rdx) # sched: [11:3.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rcl_rcr_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -10766,6 +11803,14 @@ define void @test_rdmsr_wrmsr() optsize
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rdmsr_wrmsr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rdmsr # sched: [100:0.33]
+; BDVER2-NEXT: wrmsr # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rdmsr_wrmsr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -10842,6 +11887,13 @@ define void @test_rdpmc() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rdpmc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rdpmc # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rdpmc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -10924,6 +11976,14 @@ define void @test_rdtsc_rdtscp() optsize
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rdtsc_rdtscp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rdtsc # sched: [100:0.33]
+; BDVER2-NEXT: rdtscp # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rdtsc_rdtscp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -11040,6 +12100,18 @@ define void @test_ret() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ret:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+; BDVER2-NEXT: retq $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [6:1.00]
+; BDVER2-NEXT: lretl # sched: [6:1.00]
+; BDVER2-NEXT: lretl $4095 # imm = 0xFFF
+; BDVER2-NEXT: # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ret:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -11212,6 +12284,24 @@ define void @test_rol_ror_8(i8 %a0, i8 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rol_ror_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rolb %dil # sched: [2:1.00]
+; BDVER2-NEXT: rorb %dil # sched: [2:1.00]
+; BDVER2-NEXT: rolb (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorb (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolb $7, %dil # sched: [2:1.00]
+; BDVER2-NEXT: rorb $7, %dil # sched: [2:1.00]
+; BDVER2-NEXT: rolb $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorb $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolb %cl, %dil # sched: [3:1.50]
+; BDVER2-NEXT: rorb %cl, %dil # sched: [3:1.50]
+; BDVER2-NEXT: rolb %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: rorb %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rol_ror_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -11395,6 +12485,24 @@ define void @test_rol_ror_16(i16 %a0, i1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rol_ror_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rolw %di # sched: [2:1.00]
+; BDVER2-NEXT: rorw %di # sched: [2:1.00]
+; BDVER2-NEXT: rolw (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorw (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolw $7, %di # sched: [2:1.00]
+; BDVER2-NEXT: rorw $7, %di # sched: [2:1.00]
+; BDVER2-NEXT: rolw $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorw $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolw %cl, %di # sched: [3:1.50]
+; BDVER2-NEXT: rorw %cl, %di # sched: [3:1.50]
+; BDVER2-NEXT: rolw %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: rorw %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rol_ror_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -11578,6 +12686,24 @@ define void @test_rol_ror_32(i32 %a0, i3
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rol_ror_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: roll %edi # sched: [2:1.00]
+; BDVER2-NEXT: rorl %edi # sched: [2:1.00]
+; BDVER2-NEXT: roll (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorl (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: roll $7, %edi # sched: [2:1.00]
+; BDVER2-NEXT: rorl $7, %edi # sched: [2:1.00]
+; BDVER2-NEXT: roll $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorl $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: roll %cl, %edi # sched: [3:1.50]
+; BDVER2-NEXT: rorl %cl, %edi # sched: [3:1.50]
+; BDVER2-NEXT: roll %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: rorl %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rol_ror_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -11761,6 +12887,24 @@ define void @test_rol_ror_64(i64 %a0, i6
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_rol_ror_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: rolq %rdi # sched: [2:1.00]
+; BDVER2-NEXT: rorq %rdi # sched: [2:1.00]
+; BDVER2-NEXT: rolq (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorq (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolq $7, %rdi # sched: [2:1.00]
+; BDVER2-NEXT: rorq $7, %rdi # sched: [2:1.00]
+; BDVER2-NEXT: rolq $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rorq $7, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: rolq %cl, %rdi # sched: [3:1.50]
+; BDVER2-NEXT: rorq %cl, %rdi # sched: [3:1.50]
+; BDVER2-NEXT: rolq %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: rorq %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_rol_ror_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -11993,6 +13137,30 @@ define void @test_sar_shl_shr_8(i8 %a0,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sar_shl_shr_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sarb %dil # sched: [1:0.50]
+; BDVER2-NEXT: shlb %dil # sched: [1:0.50]
+; BDVER2-NEXT: shrb %dil # sched: [1:0.50]
+; BDVER2-NEXT: sarb (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlb (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrb (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarb $7, %dil # sched: [1:0.50]
+; BDVER2-NEXT: shlb $7, %dil # sched: [1:0.50]
+; BDVER2-NEXT: shrb $7, %dil # sched: [1:0.50]
+; BDVER2-NEXT: sarb $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlb $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrb $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarb %cl, %dil # sched: [3:1.50]
+; BDVER2-NEXT: shlb %cl, %dil # sched: [3:1.50]
+; BDVER2-NEXT: shrb %cl, %dil # sched: [3:1.50]
+; BDVER2-NEXT: sarb %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shlb %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shrb %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sar_shl_shr_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -12236,6 +13404,30 @@ define void @test_sar_shl_shr_16(i16 %a0
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sar_shl_shr_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sarw %di # sched: [1:0.50]
+; BDVER2-NEXT: shlw %di # sched: [1:0.50]
+; BDVER2-NEXT: shrw %di # sched: [1:0.50]
+; BDVER2-NEXT: sarw (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlw (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrw (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: shlw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: shrw $7, %di # sched: [1:0.50]
+; BDVER2-NEXT: sarw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrw $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarw %cl, %di # sched: [3:1.50]
+; BDVER2-NEXT: shlw %cl, %di # sched: [3:1.50]
+; BDVER2-NEXT: shrw %cl, %di # sched: [3:1.50]
+; BDVER2-NEXT: sarw %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shlw %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shrw %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sar_shl_shr_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -12479,6 +13671,30 @@ define void @test_sar_shl_shr_32(i32 %a0
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sar_shl_shr_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sarl %edi # sched: [1:0.50]
+; BDVER2-NEXT: shll %edi # sched: [1:0.50]
+; BDVER2-NEXT: shrl %edi # sched: [1:0.50]
+; BDVER2-NEXT: sarl (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shll (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrl (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: shll $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: shrl $7, %edi # sched: [1:0.50]
+; BDVER2-NEXT: sarl $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shll $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrl $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarl %cl, %edi # sched: [3:1.50]
+; BDVER2-NEXT: shll %cl, %edi # sched: [3:1.50]
+; BDVER2-NEXT: shrl %cl, %edi # sched: [3:1.50]
+; BDVER2-NEXT: sarl %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shll %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shrl %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sar_shl_shr_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -12722,6 +13938,30 @@ define void @test_sar_shl_shr_64(i64 %a0
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sar_shl_shr_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sarq %rdi # sched: [1:0.50]
+; BDVER2-NEXT: shlq %rdi # sched: [1:0.50]
+; BDVER2-NEXT: shrq %rdi # sched: [1:0.50]
+; BDVER2-NEXT: sarq (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlq (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrq (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: shlq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: shrq $7, %rdi # sched: [1:0.50]
+; BDVER2-NEXT: sarq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shlq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: shrq $7, (%rdx) # sched: [7:1.00]
+; BDVER2-NEXT: sarq %cl, %rdi # sched: [3:1.50]
+; BDVER2-NEXT: shlq %cl, %rdi # sched: [3:1.50]
+; BDVER2-NEXT: shrq %cl, %rdi # sched: [3:1.50]
+; BDVER2-NEXT: sarq %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shlq %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: shrq %cl, (%rdx) # sched: [9:1.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sar_shl_shr_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -12870,6 +14110,18 @@ define void @test_sbb_8(i8 %a0, i8* %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sbb_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sbbb $7, %al # sched: [2:0.67]
+; BDVER2-NEXT: sbbb $7, %dil # sched: [2:0.67]
+; BDVER2-NEXT: sbbb $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbb %dl, %dil # sched: [2:0.67]
+; BDVER2-NEXT: sbbb %dil, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbb (%rsi), %dil # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sbb_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -13033,6 +14285,23 @@ define void @test_sbb_16(i16 %a0, i16* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sbb_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sbbw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: sbbw $7, %di # sched: [2:0.67]
+; BDVER2-NEXT: sbbw $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbw %dx, %di # sched: [2:0.67]
+; BDVER2-NEXT: sbbw %di, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbw (%rsi), %di # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sbb_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -13206,6 +14475,23 @@ define void @test_sbb_32(i32 %a0, i32* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sbb_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sbbl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: sbbl $7, %edi # sched: [2:0.67]
+; BDVER2-NEXT: sbbl $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbl %edx, %edi # sched: [2:0.67]
+; BDVER2-NEXT: sbbl %edi, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbl (%rsi), %edi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sbb_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -13379,6 +14665,23 @@ define void @test_sbb_64(i64 %a0, i64* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sbb_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: sbbq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [2:0.67]
+; BDVER2-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [9:1.00]
+; BDVER2-NEXT: sbbq $7, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: sbbq $7, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbq %rdx, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: sbbq %rdi, (%rsi) # sched: [9:1.00]
+; BDVER2-NEXT: sbbq (%rsi), %rdi # sched: [7:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sbb_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -13497,6 +14800,16 @@ define void @test_scas() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_scas:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: scasb %es:(%rdi), %al # sched: [2:0.67]
+; BDVER2-NEXT: scasw %es:(%rdi), %ax # sched: [2:0.67]
+; BDVER2-NEXT: scasl %es:(%rdi), %eax # sched: [2:0.67]
+; BDVER2-NEXT: scasq %es:(%rdi), %rax # sched: [2:0.67]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_scas:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -13825,6 +15138,44 @@ define void @test_setcc(i8 %a0, i8 *%a1)
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_setcc:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: seto %dil # sched: [1:0.50]
+; BDVER2-NEXT: setno %dil # sched: [1:0.50]
+; BDVER2-NEXT: setb %dil # sched: [1:0.50]
+; BDVER2-NEXT: setae %dil # sched: [1:0.50]
+; BDVER2-NEXT: sete %dil # sched: [1:0.50]
+; BDVER2-NEXT: setne %dil # sched: [1:0.50]
+; BDVER2-NEXT: setbe %dil # sched: [2:1.00]
+; BDVER2-NEXT: seta %dil # sched: [2:1.00]
+; BDVER2-NEXT: sets %dil # sched: [1:0.50]
+; BDVER2-NEXT: setns %dil # sched: [1:0.50]
+; BDVER2-NEXT: setp %dil # sched: [1:0.50]
+; BDVER2-NEXT: setnp %dil # sched: [1:0.50]
+; BDVER2-NEXT: setl %dil # sched: [1:0.50]
+; BDVER2-NEXT: setge %dil # sched: [1:0.50]
+; BDVER2-NEXT: setle %dil # sched: [1:0.50]
+; BDVER2-NEXT: setg %dil # sched: [1:0.50]
+; BDVER2-NEXT: seto (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setno (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setb (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setae (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: sete (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setne (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setbe (%rsi) # sched: [3:1.00]
+; BDVER2-NEXT: seta (%rsi) # sched: [3:1.00]
+; BDVER2-NEXT: sets (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setns (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setp (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setnp (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setl (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setge (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setle (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: setg (%rsi) # sched: [2:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_setcc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -14019,6 +15370,20 @@ define void @test_shld_shrd_16(i16 %a0,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_shld_shrd_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: shldw %cl, %si, %di # sched: [4:1.50]
+; BDVER2-NEXT: shrdw %cl, %si, %di # sched: [4:1.50]
+; BDVER2-NEXT: shldw %cl, %si, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shrdw %cl, %si, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shldw $7, %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: shrdw $7, %si, %di # sched: [2:0.67]
+; BDVER2-NEXT: shldw $7, %si, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: shrdw $7, %si, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_shld_shrd_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -14162,6 +15527,20 @@ define void @test_shld_shrd_32(i32 %a0,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_shld_shrd_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: shldl %cl, %esi, %edi # sched: [4:1.50]
+; BDVER2-NEXT: shrdl %cl, %esi, %edi # sched: [4:1.50]
+; BDVER2-NEXT: shldl %cl, %esi, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shrdl %cl, %esi, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shldl $7, %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: shrdl $7, %esi, %edi # sched: [2:0.67]
+; BDVER2-NEXT: shldl $7, %esi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: shrdl $7, %esi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_shld_shrd_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -14305,6 +15684,20 @@ define void @test_shld_shrd_64(i64 %a0,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_shld_shrd_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50]
+; BDVER2-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50]
+; BDVER2-NEXT: shldq %cl, %rsi, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [10:1.50]
+; BDVER2-NEXT: shldq $7, %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: shrdq $7, %rsi, %rdi # sched: [2:0.67]
+; BDVER2-NEXT: shldq $7, %rsi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: shrdq $7, %rsi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_shld_shrd_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -14405,6 +15798,14 @@ define void @test_stc_std() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_stc_std:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: stc # sched: [1:0.33]
+; BDVER2-NEXT: std # sched: [1:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_stc_std:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -14508,6 +15909,16 @@ define void @test_stos() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_stos:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: stosb %al, %es:(%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: stosw %ax, %es:(%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: stosl %eax, %es:(%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: stosq %rax, %es:(%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_stos:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -14630,6 +16041,18 @@ define void @test_sub_8(i8 %a0, i8* %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sub_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: subb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: subb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: subb $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subb %dl, %dil # sched: [1:0.33]
+; BDVER2-NEXT: subb %dil, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sub_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -14793,6 +16216,23 @@ define void @test_sub_16(i16 %a0, i16* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sub_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: subw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: subw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: subw $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subw %dx, %di # sched: [1:0.33]
+; BDVER2-NEXT: subw %di, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sub_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -14966,6 +16406,23 @@ define void @test_sub_32(i32 %a0, i32* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sub_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: subl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: subl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: subl $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subl %edx, %edi # sched: [1:0.33]
+; BDVER2-NEXT: subl %edi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sub_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -15139,6 +16596,23 @@ define void @test_sub_64(i64 %a0, i64* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_sub_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: subq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: subq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: subq $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subq %rdx, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: subq %rdi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: subq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_sub_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -15271,6 +16745,17 @@ define void @test_test_8(i8 %a0, i8* %a1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_test_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: testb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: testb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: testb $7, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: testb %dil, %dil # sched: [1:0.33]
+; BDVER2-NEXT: testb %dil, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_test_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -15408,6 +16893,20 @@ define void @test_test_16(i16 %a0, i16*
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_test_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: testw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: testw %di, %di # sched: [1:0.33]
+; BDVER2-NEXT: testw %di, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_test_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -15551,6 +17050,20 @@ define void @test_test_32(i32 %a0, i32*
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_test_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: testl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: testl %edi, %edi # sched: [1:0.33]
+; BDVER2-NEXT: testl %edi, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_test_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -15694,6 +17207,20 @@ define void @test_test_64(i64 %a0, i64*
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_test_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: testq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [6:0.50]
+; BDVER2-NEXT: testq %rdi, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: testq %rdi, (%rsi) # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_test_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -15783,6 +17310,13 @@ define void @test_ud2() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_ud2:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: ud2 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_ud2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -15873,6 +17407,14 @@ define void @test_xadd_8(i8 %a0, i8 %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xadd_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xaddb %dil, %sil # sched: [2:1.00]
+; BDVER2-NEXT: xaddb %dil, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xadd_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -15956,6 +17498,14 @@ define void @test_xadd_16(i16 %a0, i16 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xadd_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xaddw %di, %si # sched: [2:1.00]
+; BDVER2-NEXT: xaddw %di, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xadd_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -16039,6 +17589,14 @@ define void @test_xadd_32(i32 %a0, i32 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xadd_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xaddl %edi, %esi # sched: [2:1.00]
+; BDVER2-NEXT: xaddl %edi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xadd_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -16122,6 +17680,14 @@ define void @test_xadd_64(i64 %a0, i64 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xadd_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xaddq %rdi, %rsi # sched: [2:1.00]
+; BDVER2-NEXT: xaddq %rdi, (%rdx) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xadd_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -16206,6 +17772,14 @@ define void @test_xchg_8(i8 %a0, i8 %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xchg_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xchgb %sil, %dil # sched: [2:1.00]
+; BDVER2-NEXT: xchgb %dil, (%rdx) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xchg_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -16297,6 +17871,15 @@ define void @test_xchg_16(i16 %a0, i16 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xchg_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xchgw %di, %ax # sched: [2:1.00]
+; BDVER2-NEXT: xchgw %si, %di # sched: [2:1.00]
+; BDVER2-NEXT: xchgw %di, (%rdx) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xchg_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -16390,6 +17973,15 @@ define void @test_xchg_32(i32 %a0, i32 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xchg_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xchgl %edi, %eax # sched: [2:1.00]
+; BDVER2-NEXT: xchgl %esi, %edi # sched: [2:1.00]
+; BDVER2-NEXT: xchgl %edi, (%rdx) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xchg_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -16483,6 +18075,15 @@ define void @test_xchg_64(i64 %a0, i64 %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xchg_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xchgq %rdi, %rax # sched: [2:1.00]
+; BDVER2-NEXT: xchgq %rsi, %rdi # sched: [2:1.00]
+; BDVER2-NEXT: xchgq %rdi, (%rdx) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xchg_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -16561,6 +18162,13 @@ define void @test_xlat() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xlat:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xlatb # sched: [5:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xlat:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -16675,6 +18283,18 @@ define void @test_xor_8(i8 %a0, i8* %a1,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xor_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xorb $7, %al # sched: [1:0.33]
+; BDVER2-NEXT: xorb $7, %dil # sched: [1:0.33]
+; BDVER2-NEXT: xorb $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorb %dl, %dil # sched: [1:0.33]
+; BDVER2-NEXT: xorb %dil, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorb (%rsi), %dil # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xor_8:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -16838,6 +18458,23 @@ define void @test_xor_16(i16 %a0, i16* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xor_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xorw $511, %ax # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorw $511, %di # imm = 0x1FF
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorw $511, (%rsi) # imm = 0x1FF
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: xorw $7, %di # sched: [1:0.33]
+; BDVER2-NEXT: xorw $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorw %dx, %di # sched: [1:0.33]
+; BDVER2-NEXT: xorw %di, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorw (%rsi), %di # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xor_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -17011,6 +18648,23 @@ define void @test_xor_32(i32 %a0, i32* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xor_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xorl $665536, %eax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorl $665536, %edi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: xorl $7, %edi # sched: [1:0.33]
+; BDVER2-NEXT: xorl $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorl %edx, %edi # sched: [1:0.33]
+; BDVER2-NEXT: xorl %edi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorl (%rsi), %edi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xor_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -17184,6 +18838,23 @@ define void @test_xor_64(i64 %a0, i64* %
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-LABEL: test_xor_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: xorq $665536, %rax # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorq $665536, %rdi # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [1:0.33]
+; BDVER2-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: xorq $7, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: xorq $7, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorq %rdx, %rdi # sched: [1:0.33]
+; BDVER2-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00]
+; BDVER2-NEXT: xorq (%rsi), %rdi # sched: [6:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_xor_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
Modified: llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll (original)
+++ llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll Sat Oct 27 13:36:11 2018
@@ -1,25 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s --check-prefix=CORE2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 | FileCheck %s --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
define void @copy16bytes(i8* nocapture %a, i8* nocapture readonly %b) {
+; CORE2-LABEL: copy16bytes:
+; CORE2: ## %bb.0:
+; CORE2-NEXT: movq (%rsi), %rax
+; CORE2-NEXT: movq 8(%rsi), %rcx
+; CORE2-NEXT: movq %rcx, 8(%rdi)
+; CORE2-NEXT: movq %rax, (%rdi)
+; CORE2-NEXT: retq
+;
+; NEHALEM-LABEL: copy16bytes:
+; NEHALEM: ## %bb.0:
+; NEHALEM-NEXT: movups (%rsi), %xmm0
+; NEHALEM-NEXT: movups %xmm0, (%rdi)
+; NEHALEM-NEXT: retq
+;
+; BDVER2-LABEL: copy16bytes:
+; BDVER2: ## %bb.0:
+; BDVER2-NEXT: movups (%rsi), %xmm0
+; BDVER2-NEXT: movups %xmm0, (%rdi)
+; BDVER2-NEXT: retq
+;
+; BTVER2-LABEL: copy16bytes:
+; BTVER2: ## %bb.0:
+; BTVER2-NEXT: vmovups (%rsi), %xmm0
+; BTVER2-NEXT: vmovups %xmm0, (%rdi)
+; BTVER2-NEXT: retq
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i1 false)
ret void
; CHECK-LABEL: copy16bytes
- ; CORE2: movq
- ; CORE2-NEXT: movq
- ; CORE2-NEXT: movq
- ; CORE2-NEXT: movq
- ; CORE2-NEXT: retq
- ; NEHALEM: movups
- ; NEHALEM-NEXT: movups
- ; NEHALEM-NEXT: retq
- ; BTVER2: movups
- ; BTVER2-NEXT: movups
- ; BTVER2-NEXT: retq
}
Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Sat Oct 27 13:36:11 2018
@@ -14,6 +14,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@@ -100,6 +102,18 @@ define <4 x float> @test_addps(<4 x floa
; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
@@ -208,6 +222,18 @@ define float @test_addss(float %a0, floa
; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
@@ -320,6 +346,18 @@ define <4 x float> @test_andps(<4 x floa
; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_andps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_andps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_andps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
@@ -436,6 +474,18 @@ define <4 x float> @test_andnotps(<4 x f
; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_andnotps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_andnotps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_andnotps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
@@ -563,6 +613,20 @@ define <4 x float> @test_cmpps(<4 x floa
; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cmpps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cmpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cmpps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00]
@@ -679,6 +743,18 @@ define float @test_cmpss(float %a0, floa
; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cmpss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cmpss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cmpss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00]
@@ -896,6 +972,34 @@ define i32 @test_comiss(<4 x float> %a0,
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_comiss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_comiss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_comiss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -1051,6 +1155,20 @@ define float @test_cvtsi2ss(i32 %a0, i32
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsi2ss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsi2ss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsi2ss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [14:1.00]
@@ -1177,6 +1295,20 @@ define float @test_cvtsi2ssq(i64 %a0, i6
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsi2ssq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsi2ssq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsi2ssq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [14:1.00]
@@ -1303,6 +1435,20 @@ define i32 @test_cvtss2si(float %a0, flo
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtss2si:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtss2si:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtss2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00]
@@ -1432,6 +1578,20 @@ define i64 @test_cvtss2siq(float %a0, fl
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtss2siq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtss2siq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtss2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00]
@@ -1561,6 +1721,20 @@ define i32 @test_cvttss2si(float %a0, fl
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttss2si:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttss2si:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttss2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00]
@@ -1687,6 +1861,20 @@ define i64 @test_cvttss2siq(float %a0, f
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttss2siq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttss2siq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttss2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00]
@@ -1800,6 +1988,18 @@ define <4 x float> @test_divps(<4 x floa
; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_divps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [14:14.00]
+; BDVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [20:14.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_divps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:14.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_divps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [19:19.00]
@@ -1908,6 +2108,18 @@ define float @test_divss(float %a0, floa
; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_divss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [14:14.00]
+; BDVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [20:14.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_divss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:14.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_divss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [19:19.00]
@@ -2016,6 +2228,18 @@ define void @test_ldmxcsr(i32 %a0) {
; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_ldmxcsr:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_ldmxcsr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; BDVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_ldmxcsr:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
@@ -2126,6 +2350,18 @@ define <4 x float> @test_maxps(<4 x floa
; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_maxps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_maxps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_maxps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00]
@@ -2235,6 +2471,18 @@ define <4 x float> @test_maxss(<4 x floa
; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_maxss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_maxss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_maxss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00]
@@ -2344,6 +2592,18 @@ define <4 x float> @test_minps(<4 x floa
; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_minps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_minps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_minps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00]
@@ -2453,6 +2713,18 @@ define <4 x float> @test_minss(<4 x floa
; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_minss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_minss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_minss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00]
@@ -2575,6 +2847,20 @@ define void @test_movaps(<4 x float> *%a
; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movaps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movaps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movaps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:1.00]
@@ -2682,6 +2968,16 @@ define <4 x float> @test_movhlps(<4 x fl
; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movhlps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movhlps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movhlps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
@@ -2813,6 +3109,22 @@ define <4 x float> @test_movhps(<4 x flo
; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movhps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movhps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -2935,6 +3247,18 @@ define <4 x float> @test_movlhps(<4 x fl
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movlhps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movlhps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movlhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -3069,6 +3393,22 @@ define <4 x float> @test_movlps(<4 x flo
; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movlps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movlps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movlps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3178,6 +3518,16 @@ define i32 @test_movmskps(<4 x float> %a
; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movmskps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movmskps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movmskps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00]
@@ -3274,6 +3624,16 @@ define void @test_movntps(<4 x float> %a
; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movntps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movntps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movntps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00]
@@ -3389,6 +3749,20 @@ define void @test_movss_mem(float* %a0,
; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movss_mem:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movss_mem:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movss_mem:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -3494,6 +3868,16 @@ define <4 x float> @test_movss_reg(<4 x
; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movss_reg:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movss_reg:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movss_reg:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
@@ -3609,6 +3993,20 @@ define void @test_movups(<4 x float> *%a
; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movups:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movups:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movups:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:1.00]
@@ -3721,6 +4119,18 @@ define <4 x float> @test_mulps(<4 x floa
; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mulps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mulps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mulps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [2:1.00]
@@ -3829,6 +4239,18 @@ define float @test_mulss(float %a0, floa
; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mulss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mulss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mulss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [2:1.00]
@@ -3941,6 +4363,18 @@ define <4 x float> @test_orps(<4 x float
; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_orps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_orps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_orps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
@@ -4105,6 +4539,26 @@ define void @test_prefetch(i8* %a0) opts
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_prefetch:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: #APP
+; BDVER2-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
+; BDVER2-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
+; BDVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
+; BDVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
+; BDVER2-SSE-NEXT: #NO_APP
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_prefetch:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
+; BDVER2-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
+; BDVER2-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
+; BDVER2-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_prefetch:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: #APP
@@ -4242,6 +4696,20 @@ define <4 x float> @test_rcpps(<4 x floa
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_rcpps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_rcpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_rcpps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [2:1.00]
@@ -4384,6 +4852,22 @@ define <4 x float> @test_rcpss(float %a0
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_rcpss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_rcpss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_rcpss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -4519,6 +5003,20 @@ define <4 x float> @test_rsqrtps(<4 x fl
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_rsqrtps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_rsqrtps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_rsqrtps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [2:1.00]
@@ -4661,6 +5159,22 @@ define <4 x float> @test_rsqrtss(float %
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_rsqrtss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_rsqrtss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_rsqrtss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -4774,6 +5288,16 @@ define void @test_sfence() {
; SKX-NEXT: sfence # sched: [2:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_sfence:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: sfence # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_sfence:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: sfence # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_sfence:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: sfence # sched: [1:1.00]
@@ -4890,6 +5414,20 @@ define <4 x float> @test_shufps(<4 x flo
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_shufps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_shufps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
+; BDVER2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_shufps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
@@ -5017,6 +5555,20 @@ define <4 x float> @test_sqrtps(<4 x flo
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_sqrtps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:14.00]
+; BDVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:14.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_sqrtps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:14.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_sqrtps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [21:21.00]
@@ -5159,6 +5711,22 @@ define <4 x float> @test_sqrtss(<4 x flo
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_sqrtss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:14.00]
+; BDVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:14.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_sqrtss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
+; BDVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
+; BDVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:14.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_sqrtss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:1.00]
@@ -5277,6 +5845,18 @@ define i32 @test_stmxcsr() {
; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_stmxcsr:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_stmxcsr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; BDVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_stmxcsr:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
@@ -5387,6 +5967,18 @@ define <4 x float> @test_subps(<4 x floa
; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_subps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_subps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_subps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
@@ -5495,6 +6087,18 @@ define float @test_subss(float %a0, floa
; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_subss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_subss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_subss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
@@ -5707,6 +6311,34 @@ define i32 @test_ucomiss(<4 x float> %a0
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_ucomiss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_ucomiss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_ucomiss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
@@ -5862,6 +6494,20 @@ define <4 x float> @test_unpckhps(<4 x f
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_unpckhps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_unpckhps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BDVER2-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_unpckhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -5988,6 +6634,20 @@ define <4 x float> @test_unpcklps(<4 x f
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_unpcklps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_unpcklps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; BDVER2-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_unpcklps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -6105,6 +6765,18 @@ define <4 x float> @test_xorps(<4 x floa
; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_xorps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_xorps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_xorps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
@@ -6249,6 +6921,22 @@ define <4 x float> @test_fnop() nounwind
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_fnop:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: #APP
+; BDVER2-SSE-NEXT: nop # sched: [1:0.25]
+; BDVER2-SSE-NEXT: #NO_APP
+; BDVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_fnop:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: nop # sched: [1:0.25]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_fnop:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.50]
Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Sat Oct 27 13:36:11 2018
@@ -14,6 +14,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@@ -98,6 +100,18 @@ define <2 x double> @test_addpd(<2 x dou
; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
@@ -206,6 +220,18 @@ define double @test_addsd(double %a0, do
; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
@@ -327,6 +353,20 @@ define <2 x double> @test_andpd(<2 x dou
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_andpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_andpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_andpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
@@ -457,6 +497,20 @@ define <2 x double> @test_andnotpd(<2 x
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_andnotpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_andnotpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_andnotpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
@@ -569,6 +623,16 @@ define void @test_clflush(i8* %p){
; SKX-NEXT: clflush (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_clflush:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_clflush:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: clflush (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_clflush:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
@@ -685,6 +749,20 @@ define <2 x double> @test_cmppd(<2 x dou
; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cmppd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cmppd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cmppd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
@@ -800,6 +878,18 @@ define double @test_cmpsd(double %a0, do
; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cmpsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cmpsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cmpsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
@@ -1017,6 +1107,34 @@ define i32 @test_comisd(<2 x double> %a0
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_comisd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_comisd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_comisd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -1174,6 +1292,20 @@ define <2 x double> @test_cvtdq2pd(<4 x
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtdq2pd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtdq2pd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtdq2pd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00]
@@ -1303,6 +1435,20 @@ define <4 x float> @test_cvtdq2ps(<4 x i
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtdq2ps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtdq2ps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtdq2ps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
@@ -1431,6 +1577,20 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtpd2dq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtpd2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtpd2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00]
@@ -1560,6 +1720,20 @@ define <4 x float> @test_cvtpd2ps(<2 x d
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtpd2ps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtpd2ps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtpd2ps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00]
@@ -1688,6 +1862,20 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtps2dq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtps2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtps2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
@@ -1816,6 +2004,20 @@ define <2 x double> @test_cvtps2pd(<4 x
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtps2pd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtps2pd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtps2pd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
@@ -1944,6 +2146,20 @@ define i32 @test_cvtsd2si(double %a0, do
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsd2si:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsd2si:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [10:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsd2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00]
@@ -2073,6 +2289,20 @@ define i64 @test_cvtsd2siq(double %a0, d
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsd2siq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsd2siq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsd2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00]
@@ -2216,6 +2446,22 @@ define float @test_cvtsd2ss(double %a0,
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsd2ss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsd2ss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
+; BDVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
+; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsd2ss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [7:2.00]
@@ -2346,6 +2592,20 @@ define double @test_cvtsi2sd(i32 %a0, i3
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsi2sd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsi2sd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsi2sd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [14:1.00]
@@ -2472,6 +2732,20 @@ define double @test_cvtsi2sdq(i64 %a0, i
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtsi2sdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtsi2sdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtsi2sdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [14:1.00]
@@ -2614,6 +2888,22 @@ define double @test_cvtss2sd(float %a0,
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvtss2sd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvtss2sd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
+; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvtss2sd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [7:2.00]
@@ -2746,6 +3036,20 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttpd2dq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; BDVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttpd2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; BDVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttpd2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00]
@@ -2875,6 +3179,20 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttps2dq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttps2dq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttps2dq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
@@ -3001,6 +3319,20 @@ define i32 @test_cvttsd2si(double %a0, d
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttsd2si:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttsd2si:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; BDVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [10:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttsd2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00]
@@ -3127,6 +3459,20 @@ define i64 @test_cvttsd2siq(double %a0,
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_cvttsd2siq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_cvttsd2siq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; BDVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_cvttsd2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00]
@@ -3240,6 +3586,18 @@ define <2 x double> @test_divpd(<2 x dou
; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_divpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00]
+; BDVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_divpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:22.00]
+; BDVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:22.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_divpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [19:19.00]
@@ -3348,6 +3706,18 @@ define double @test_divsd(double %a0, do
; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_divsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00]
+; BDVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_divsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:22.00]
+; BDVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:22.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_divsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [19:19.00]
@@ -3449,6 +3819,16 @@ define void @test_lfence() {
; SKX-NEXT: lfence # sched: [2:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_lfence:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: lfence # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_lfence:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: lfence # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_lfence:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: lfence # sched: [1:1.00]
@@ -3545,6 +3925,16 @@ define void @test_mfence() {
; SKX-NEXT: mfence # sched: [3:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mfence:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mfence # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mfence:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: mfence # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mfence:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mfence # sched: [1:1.00]
@@ -3639,6 +4029,16 @@ define void @test_maskmovdqu(<16 x i8> %
; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_maskmovdqu:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_maskmovdqu:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_maskmovdqu:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
@@ -3742,6 +4142,18 @@ define <2 x double> @test_maxpd(<2 x dou
; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_maxpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_maxpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_maxpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00]
@@ -3851,6 +4263,18 @@ define <2 x double> @test_maxsd(<2 x dou
; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_maxsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_maxsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_maxsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00]
@@ -3960,6 +4384,18 @@ define <2 x double> @test_minpd(<2 x dou
; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_minpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_minpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_minpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00]
@@ -4069,6 +4505,18 @@ define <2 x double> @test_minsd(<2 x dou
; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_minsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_minsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_minsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00]
@@ -4191,6 +4639,20 @@ define void @test_movapd(<2 x double> *%
; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movapd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movapd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movapd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:1.00]
@@ -4316,6 +4778,20 @@ define void @test_movdqa(<2 x i64> *%a0,
; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movdqa:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movdqa:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movdqa:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:1.00]
@@ -4441,6 +4917,20 @@ define void @test_movdqu(<2 x i64> *%a0,
; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movdqu:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movdqu:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movdqu:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:1.00]
@@ -4605,6 +5095,26 @@ define i32 @test_movd(<4 x i32> %a0, i32
; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
+; BDVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
+; BDVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -4786,6 +5296,26 @@ define i64 @test_movd_64(<2 x i64> %a0,
; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movd_64:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
+; BDVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movd_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
+; BDVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
+; BDVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movd_64:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
@@ -4942,6 +5472,22 @@ define <2 x double> @test_movhpd(<2 x do
; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movhpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movhpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movhpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -5088,6 +5634,22 @@ define <2 x double> @test_movlpd(<2 x do
; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movlpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movlpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movlpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -5196,6 +5758,16 @@ define i32 @test_movmskpd(<2 x double> %
; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movmskpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movmskpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movmskpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
@@ -5301,6 +5873,18 @@ define void @test_movntdqa(<2 x i64> %a0
; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movntdqa:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movntdqa:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movntdqa:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
@@ -5408,6 +5992,18 @@ define void @test_movntpd(<2 x double> %
; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movntpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movntpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movntpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
@@ -5528,6 +6124,20 @@ define <2 x i64> @test_movq_mem(<2 x i64
; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movq_mem:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movq_mem:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movq_mem:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
@@ -5644,6 +6254,18 @@ define <2 x i64> @test_movq_reg(<2 x i64
; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movq_reg:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movq_reg:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; BDVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movq_reg:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
@@ -5764,6 +6386,20 @@ define void @test_movsd_mem(double* %a0,
; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movsd_mem:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; BDVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movsd_mem:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; BDVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movsd_mem:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
@@ -5875,6 +6511,17 @@ define <2 x double> @test_movsd_reg(<2 x
; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movsd_reg:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movsd_reg:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movsd_reg:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
@@ -5992,6 +6639,20 @@ define void @test_movupd(<2 x double> *%
; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movupd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movupd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movupd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:1.00]
@@ -6104,6 +6765,18 @@ define <2 x double> @test_mulpd(<2 x dou
; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mulpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mulpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mulpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:2.00]
@@ -6212,6 +6885,18 @@ define double @test_mulsd(double %a0, do
; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mulsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mulsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mulsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:2.00]
@@ -6333,6 +7018,20 @@ define <2 x double> @test_orpd(<2 x doub
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_orpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_orpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_orpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
@@ -6454,6 +7153,18 @@ define <8 x i16> @test_packssdw(<4 x i32
; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_packssdw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_packssdw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_packssdw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
@@ -6568,6 +7279,18 @@ define <16 x i8> @test_packsswb(<8 x i16
; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_packsswb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_packsswb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_packsswb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
@@ -6682,6 +7405,18 @@ define <16 x i8> @test_packuswb(<8 x i16
; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_packuswb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_packuswb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_packuswb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
@@ -6796,6 +7531,18 @@ define <16 x i8> @test_paddb(<16 x i8> %
; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
@@ -6908,6 +7655,18 @@ define <4 x i32> @test_paddd(<4 x i32> %
; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
@@ -7016,6 +7775,18 @@ define <2 x i64> @test_paddq(<2 x i64> %
; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
@@ -7128,6 +7899,18 @@ define <16 x i8> @test_paddsb(<16 x i8>
; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddsb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
@@ -7241,6 +8024,18 @@ define <8 x i16> @test_paddsw(<8 x i16>
; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
@@ -7354,6 +8149,18 @@ define <16 x i8> @test_paddusb(<16 x i8>
; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddusb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddusb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddusb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
@@ -7467,6 +8274,18 @@ define <8 x i16> @test_paddusw(<8 x i16>
; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddusw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddusw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddusw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
@@ -7580,6 +8399,18 @@ define <8 x i16> @test_paddw(<8 x i16> %
; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_paddw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_paddw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_paddw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
@@ -7701,6 +8532,20 @@ define <2 x i64> @test_pand(<2 x i64> %a
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pand:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pand:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pand:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50]
@@ -7843,6 +8688,22 @@ define <2 x i64> @test_pandn(<2 x i64> %
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pandn:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pandn:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pandn:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50]
@@ -7966,6 +8827,18 @@ define <16 x i8> @test_pavgb(<16 x i8> %
; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pavgb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pavgb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pavgb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
@@ -8088,6 +8961,18 @@ define <8 x i16> @test_pavgw(<8 x i16> %
; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pavgw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pavgw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pavgw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
@@ -8221,6 +9106,20 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpeqb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpeqb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpeqb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
@@ -8350,6 +9249,20 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpeqd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpeqd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpeqd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
@@ -8479,6 +9392,20 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpeqw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpeqw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpeqw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
@@ -8614,6 +9541,21 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpgtb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpgtb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpgtb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
@@ -8751,6 +9693,21 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpgtd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpgtd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpgtd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
@@ -8888,6 +9845,21 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpgtw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpgtw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpgtw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
@@ -9004,6 +9976,18 @@ define i16 @test_pextrw(<8 x i16> %a0) {
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pextrw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pextrw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pextrw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
@@ -9114,6 +10098,18 @@ define <8 x i16> @test_pinsrw(<8 x i16>
; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pinsrw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pinsrw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pinsrw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [7:0.50]
@@ -9222,6 +10218,18 @@ define <4 x i32> @test_pmaddwd(<8 x i16>
; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaddwd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaddwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaddwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [2:1.00]
@@ -9336,6 +10344,18 @@ define <8 x i16> @test_pmaxsw(<8 x i16>
; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
@@ -9449,6 +10469,18 @@ define <16 x i8> @test_pmaxub(<16 x i8>
; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxub:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxub:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
@@ -9562,6 +10594,18 @@ define <8 x i16> @test_pminsw(<8 x i16>
; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
@@ -9675,6 +10719,18 @@ define <16 x i8> @test_pminub(<16 x i8>
; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminub:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminub:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
@@ -9773,6 +10829,16 @@ define i32 @test_pmovmskb(<16 x i8> %a0)
; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovmskb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovmskb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovmskb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
@@ -9876,6 +10942,18 @@ define <8 x i16> @test_pmulhuw(<8 x i16>
; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmulhuw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmulhuw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmulhuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [2:1.00]
@@ -9985,6 +11063,18 @@ define <8 x i16> @test_pmulhw(<8 x i16>
; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmulhw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmulhw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmulhw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [2:1.00]
@@ -10094,6 +11184,18 @@ define <8 x i16> @test_pmullw(<8 x i16>
; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmullw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmullw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmullw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [2:1.00]
@@ -10202,6 +11304,18 @@ define <2 x i64> @test_pmuludq(<4 x i32>
; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmuludq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmuludq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmuludq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [2:1.00]
@@ -10325,6 +11439,20 @@ define <2 x i64> @test_por(<2 x i64> %a0
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_por:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_por:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_por:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
@@ -10438,6 +11566,18 @@ define <2 x i64> @test_psadbw(<16 x i8>
; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psadbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psadbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psadbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [2:0.50]
@@ -10564,6 +11704,20 @@ define <4 x i32> @test_pshufd(<4 x i32>
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pshufd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
+; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pshufd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
+; BDVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pshufd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
@@ -10693,6 +11847,20 @@ define <8 x i16> @test_pshufhw(<8 x i16>
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pshufhw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pshufhw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; BDVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pshufhw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
@@ -10822,6 +11990,20 @@ define <8 x i16> @test_pshuflw(<8 x i16>
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pshuflw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pshuflw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; BDVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pshuflw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
@@ -10948,6 +12130,20 @@ define <4 x i32> @test_pslld(<4 x i32> %
; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pslld:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pslld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pslld:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:0.50]
@@ -11056,6 +12252,16 @@ define <4 x i32> @test_pslldq(<4 x i32>
; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pslldq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pslldq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pslldq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
@@ -11171,6 +12377,20 @@ define <2 x i64> @test_psllq(<2 x i64> %
; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psllq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psllq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psllq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:0.50]
@@ -11299,6 +12519,20 @@ define <8 x i16> @test_psllw(<8 x i16> %
; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psllw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psllw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psllw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:0.50]
@@ -11427,6 +12661,20 @@ define <4 x i32> @test_psrad(<4 x i32> %
; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psrad:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psrad:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psrad:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:0.50]
@@ -11555,6 +12803,20 @@ define <8 x i16> @test_psraw(<8 x i16> %
; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psraw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psraw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psraw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:0.50]
@@ -11683,6 +12945,20 @@ define <4 x i32> @test_psrld(<4 x i32> %
; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psrld:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psrld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psrld:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:0.50]
@@ -11791,6 +13067,16 @@ define <4 x i32> @test_psrldq(<4 x i32>
; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psrldq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psrldq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psrldq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
@@ -11906,6 +13192,20 @@ define <2 x i64> @test_psrlq(<2 x i64> %
; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psrlq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psrlq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psrlq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:0.50]
@@ -12034,6 +13334,20 @@ define <8 x i16> @test_psrlw(<8 x i16> %
; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psrlw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psrlw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psrlw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:0.50]
@@ -12153,6 +13467,18 @@ define <16 x i8> @test_psubb(<16 x i8> %
; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
@@ -12265,6 +13591,18 @@ define <4 x i32> @test_psubd(<4 x i32> %
; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
@@ -12373,6 +13711,18 @@ define <2 x i64> @test_psubq(<2 x i64> %
; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
@@ -12485,6 +13835,18 @@ define <16 x i8> @test_psubsb(<16 x i8>
; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubsb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
@@ -12598,6 +13960,18 @@ define <8 x i16> @test_psubsw(<8 x i16>
; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
@@ -12711,6 +14085,18 @@ define <16 x i8> @test_psubusb(<16 x i8>
; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubusb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubusb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubusb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
@@ -12824,6 +14210,18 @@ define <8 x i16> @test_psubusw(<8 x i16>
; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubusw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubusw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubusw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
@@ -12937,6 +14335,18 @@ define <8 x i16> @test_psubw(<8 x i16> %
; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psubw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psubw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psubw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
@@ -13049,6 +14459,18 @@ define <16 x i8> @test_punpckhbw(<16 x i
; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpckhbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpckhbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpckhbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
@@ -13172,6 +14594,20 @@ define <4 x i32> @test_punpckhdq(<4 x i3
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpckhdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpckhdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpckhdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -13298,6 +14734,20 @@ define <2 x i64> @test_punpckhqdq(<2 x i
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpckhqdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpckhqdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpckhqdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -13415,6 +14865,18 @@ define <8 x i16> @test_punpckhwd(<8 x i1
; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpckhwd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpckhwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpckhwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
@@ -13527,6 +14989,18 @@ define <16 x i8> @test_punpcklbw(<16 x i
; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpcklbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpcklbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpcklbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
@@ -13650,6 +15124,20 @@ define <4 x i32> @test_punpckldq(<4 x i3
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpckldq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpckldq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpckldq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -13776,6 +15264,20 @@ define <2 x i64> @test_punpcklqdq(<2 x i
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpcklqdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpcklqdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpcklqdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -13893,6 +15395,18 @@ define <8 x i16> @test_punpcklwd(<8 x i1
; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_punpcklwd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_punpcklwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_punpcklwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -14014,6 +15528,20 @@ define <2 x i64> @test_pxor(<2 x i64> %a
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pxor:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pxor:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pxor:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50]
@@ -14140,6 +15668,20 @@ define <2 x double> @test_shufpd(<2 x do
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_shufpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_shufpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; BDVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_shufpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
@@ -14267,6 +15809,20 @@ define <2 x double> @test_sqrtpd(<2 x do
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_sqrtpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
+; BDVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_sqrtpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:21.00]
+; BDVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:21.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_sqrtpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [27:27.00]
@@ -14409,6 +15965,22 @@ define <2 x double> @test_sqrtsd(<2 x do
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_sqrtsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00]
+; BDVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_sqrtsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00]
+; BDVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50]
+; BDVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:21.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_sqrtsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:1.00]
@@ -14527,6 +16099,18 @@ define <2 x double> @test_subpd(<2 x dou
; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_subpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_subpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_subpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
@@ -14635,6 +16219,18 @@ define double @test_subsd(double %a0, do
; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_subsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_subsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_subsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
@@ -14847,6 +16443,34 @@ define i32 @test_ucomisd(<2 x double> %a
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_ucomisd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_ucomisd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %cl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: setnp %al # sched: [1:0.50]
+; BDVER2-NEXT: sete %dl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33]
+; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33]
+; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_ucomisd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -15002,6 +16626,20 @@ define <2 x double> @test_unpckhpd(<2 x
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_unpckhpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_unpckhpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_unpckhpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -15136,6 +16774,21 @@ define <2 x double> @test_unpcklpd(<2 x
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_unpcklpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_unpcklpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_unpcklpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
@@ -15264,6 +16917,20 @@ define <2 x double> @test_xorpd(<2 x dou
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_xorpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_xorpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_xorpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50]
Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Sat Oct 27 13:36:11 2018
@@ -14,7 +14,9 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+sse3 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+sse3 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
@@ -98,6 +100,18 @@ define <2 x double> @test_addsubpd(<2 x
; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addsubpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addsubpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addsubpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
@@ -207,6 +221,18 @@ define <4 x float> @test_addsubps(<4 x f
; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_addsubps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_addsubps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_addsubps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
@@ -316,6 +342,18 @@ define <2 x double> @test_haddpd(<2 x do
; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_haddpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_haddpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_haddpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00]
@@ -425,6 +463,18 @@ define <4 x float> @test_haddps(<4 x flo
; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_haddps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_haddps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_haddps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00]
@@ -534,6 +584,18 @@ define <2 x double> @test_hsubpd(<2 x do
; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_hsubpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_hsubpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_hsubpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00]
@@ -643,6 +705,18 @@ define <4 x float> @test_hsubps(<4 x flo
; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_hsubps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
+; BDVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_hsubps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; BDVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_hsubps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00]
@@ -741,6 +815,16 @@ define <16 x i8> @test_lddqu(i8* %a0) {
; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_lddqu:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_lddqu:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_lddqu:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00]
@@ -857,6 +941,20 @@ define void @test_monitor(i8* %a0, i32 %
; SKX-NEXT: monitor # sched: [100:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_monitor:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; BDVER2-SSE-NEXT: monitor # sched: [100:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_monitor:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %esi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
+; BDVER2-NEXT: monitor # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_monitor:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50]
@@ -982,6 +1080,20 @@ define <2 x double> @test_movddup(<2 x d
; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movddup:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
+; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movddup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
+; BDVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
+; BDVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movddup:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
@@ -1109,6 +1221,20 @@ define <4 x float> @test_movshdup(<4 x f
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movshdup:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movshdup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
+; BDVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movshdup:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
@@ -1236,6 +1362,20 @@ define <4 x float> @test_movsldup(<4 x f
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movsldup:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movsldup:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
+; BDVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movsldup:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
@@ -1362,6 +1502,20 @@ define void @test_mwait(i32 %a0, i32 %a1
; SKX-NEXT: mwait # sched: [20:2.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mwait:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: mwait # sched: [100:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mwait:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %esi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: movl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: mwait # sched: [100:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mwait:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50]
Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Sat Oct 27 13:36:11 2018
@@ -13,6 +13,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=+sse4.1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@@ -103,6 +105,20 @@ define <2 x double> @test_blendpd(<2 x d
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_blendpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
+; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_blendpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
+; BDVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_blendpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
@@ -222,6 +238,20 @@ define <4 x float> @test_blendps(<4 x fl
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_blendps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_blendps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; BDVER2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_blendps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@@ -350,6 +380,21 @@ define <2 x double> @test_blendvpd(<2 x
; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_blendvpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_blendvpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_blendvpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50]
@@ -480,6 +525,21 @@ define <4 x float> @test_blendvps(<4 x f
; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_blendvps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_blendvps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_blendvps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50]
@@ -589,6 +649,18 @@ define <2 x double> @test_dppd(<2 x doub
; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_dppd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_dppd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_dppd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:3.00]
@@ -692,6 +764,18 @@ define <4 x float> @test_dpps(<4 x float
; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_dpps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
+; BDVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [18:2.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_dpps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
+; BDVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_dpps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [11:3.00]
@@ -795,6 +879,18 @@ define i32 @test_extractps(<4 x float> %
; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_extractps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_extractps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_extractps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
@@ -899,6 +995,18 @@ define <4 x float> @test_insertps(<4 x f
; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_insertps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_insertps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_insertps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
@@ -990,6 +1098,16 @@ define <2 x i64> @test_movntdqa(i8* %a0)
; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_movntdqa:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_movntdqa:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_movntdqa:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:1.00]
@@ -1087,6 +1205,18 @@ define <8 x i16> @test_mpsadbw(<16 x i8>
; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_mpsadbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
+; BDVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_mpsadbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_mpsadbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00]
@@ -1191,6 +1321,18 @@ define <8 x i16> @test_packusdw(<4 x i32
; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_packusdw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_packusdw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_packusdw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
@@ -1316,6 +1458,21 @@ define <16 x i8> @test_pblendvb(<16 x i8
; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pblendvb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pblendvb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pblendvb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50]
@@ -1437,6 +1594,20 @@ define <8 x i16> @test_pblendw(<8 x i16>
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pblendw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pblendw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; BDVER2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pblendw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
@@ -1544,6 +1715,18 @@ define <2 x i64> @test_pcmpeqq(<2 x i64>
; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpeqq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpeqq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpeqq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
@@ -1648,6 +1831,18 @@ define i32 @test_pextrb(<16 x i8> %a0, i
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pextrb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pextrb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pextrb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
@@ -1763,6 +1958,20 @@ define i32 @test_pextrd(<4 x i32> %a0, i
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pextrd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pextrd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pextrd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
@@ -1870,6 +2079,18 @@ define i64 @test_pextrq(<2 x i64> %a0, <
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pextrq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pextrq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
+; BDVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pextrq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
@@ -1972,6 +2193,18 @@ define i32 @test_pextrw(<8 x i16> %a0, i
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pextrw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pextrw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
+; BDVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pextrw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
@@ -2075,6 +2308,18 @@ define <8 x i16> @test_phminposuw(<8 x i
; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phminposuw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phminposuw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phminposuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00]
@@ -2178,6 +2423,18 @@ define <16 x i8> @test_pinsrb(<16 x i8>
; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pinsrb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pinsrb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pinsrb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [7:0.50]
@@ -2280,6 +2537,18 @@ define <4 x i32> @test_pinsrd(<4 x i32>
; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pinsrd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pinsrd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pinsrd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [7:0.50]
@@ -2394,6 +2663,20 @@ define <2 x i64> @test_pinsrq(<2 x i64>
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pinsrq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pinsrq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pinsrq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [7:0.50]
@@ -2501,6 +2784,18 @@ define <16 x i8> @test_pmaxsb(<16 x i8>
; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxsb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
@@ -2604,6 +2899,18 @@ define <4 x i32> @test_pmaxsd(<4 x i32>
; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
@@ -2707,6 +3014,18 @@ define <4 x i32> @test_pmaxud(<4 x i32>
; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxud:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxud:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxud:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
@@ -2810,6 +3129,18 @@ define <8 x i16> @test_pmaxuw(<8 x i16>
; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaxuw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaxuw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaxuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
@@ -2913,6 +3244,18 @@ define <16 x i8> @test_pminsb(<16 x i8>
; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminsb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
@@ -3016,6 +3359,18 @@ define <4 x i32> @test_pminsd(<4 x i32>
; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
@@ -3119,6 +3474,18 @@ define <4 x i32> @test_pminud(<4 x i32>
; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminud:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminud:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminud:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
@@ -3222,6 +3589,18 @@ define <8 x i16> @test_pminuw(<8 x i16>
; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pminuw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pminuw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pminuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
@@ -3338,6 +3717,20 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
@@ -3459,6 +3852,20 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxbd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxbd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxbd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
@@ -3580,6 +3987,20 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxbq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxbq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxbq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
@@ -3701,6 +4122,20 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
@@ -3822,6 +4257,20 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxwd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
@@ -3943,6 +4392,20 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovsxwq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovsxwq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovsxwq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
@@ -4064,6 +4527,20 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxbw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxbw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
@@ -4185,6 +4662,20 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxbd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxbd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxbd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
@@ -4306,6 +4797,20 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxbq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxbq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxbq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
@@ -4427,6 +4932,20 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
@@ -4548,6 +5067,20 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxwd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxwd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
@@ -4669,6 +5202,20 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmovzxwq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
+; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmovzxwq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
+; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmovzxwq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
@@ -4789,6 +5336,20 @@ define <2 x i64> @test_pmuldq(<4 x i32>
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmuldq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmuldq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmuldq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00]
@@ -4897,6 +5458,18 @@ define <4 x i32> @test_pmulld(<4 x i32>
; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmulld:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmulld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmulld:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:2.00]
@@ -5047,6 +5620,26 @@ define i32 @test_ptest(<2 x i64> %a0, <2
; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_ptest:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-SSE-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-SSE-NEXT: setb %cl # sched: [1:0.50]
+; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_ptest:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
+; BDVER2-NEXT: setb %al # sched: [1:0.50]
+; BDVER2-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
+; BDVER2-NEXT: setb %cl # sched: [1:0.50]
+; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
+; BDVER2-NEXT: movzbl %cl, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_ptest:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
@@ -5181,6 +5774,20 @@ define <2 x double> @test_roundpd(<2 x d
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_roundpd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_roundpd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_roundpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
@@ -5303,6 +5910,20 @@ define <4 x float> @test_roundps(<4 x fl
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_roundps:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_roundps:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
+; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_roundps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
@@ -5430,6 +6051,21 @@ define <2 x double> @test_roundsd(<2 x d
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_roundsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_roundsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_roundsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
@@ -5559,6 +6195,21 @@ define <4 x float> @test_roundss(<4 x fl
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_roundss:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
+; BDVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_roundss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
+; BDVER2-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_roundss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
Modified: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll Sat Oct 27 13:36:11 2018
@@ -13,6 +13,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=+sse4.2,+pclmul -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@@ -103,6 +105,20 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: crc32_32_8:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: crc32_32_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00]
+; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: crc32_32_8:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
@@ -222,6 +238,20 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
; SKX-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: crc32_32_16:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: crc32_32_16:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: crc32w %si, %eax # sched: [3:1.00]
+; BDVER2-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: crc32_32_16:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
@@ -341,6 +371,20 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
; SKX-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: crc32_32_32:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: crc32_32_32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
+; BDVER2-NEXT: crc32l %esi, %eax # sched: [3:1.00]
+; BDVER2-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: crc32_32_32:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
@@ -460,6 +504,20 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: crc32_64_8:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: crc32_64_8:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00]
+; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: crc32_64_8:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
@@ -579,6 +637,20 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
; SKX-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: crc32_64_64:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
+; BDVER2-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: crc32_64_64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33]
+; BDVER2-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
+; BDVER2-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: crc32_64_64:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
@@ -770,6 +842,32 @@ define i32 @test_pcmpestri(<16 x i8> %a0
; SKX-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpestri:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
+; BDVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
+; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BDVER2-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpestri:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
+; BDVER2-NEXT: movl %ecx, %esi # sched: [1:0.33]
+; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
+; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BDVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpestri:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
@@ -950,6 +1048,26 @@ define <16 x i8> @test_pcmpestrm(<16 x i
; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpestrm:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
+; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpestrm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
+; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
+; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
+; BDVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpestrm:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
@@ -1105,6 +1223,24 @@ define i32 @test_pcmpistri(<16 x i8> %a0
; SKX-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpistri:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BDVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.33]
+; BDVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
+; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BDVER2-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpistri:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BDVER2-NEXT: movl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
+; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx
+; BDVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpistri:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00]
@@ -1221,6 +1357,18 @@ define <16 x i8> @test_pcmpistrm(<16 x i
; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpistrm:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BDVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpistrm:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BDVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpistrm:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00]
@@ -1324,6 +1472,18 @@ define <2 x i64> @test_pcmpgtq(<2 x i64>
; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pcmpgtq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pcmpgtq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpcomgtq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pcmpgtq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
@@ -1428,6 +1588,18 @@ define <2 x i64> @test_pclmulqdq(<2 x i6
; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pclmulqdq:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00]
+; BDVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pclmulqdq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [14:6.00]
+; BDVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [14:5.67]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pclmulqdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [2:1.00]
Modified: llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll Sat Oct 27 13:36:11 2018
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1
@@ -9,6 +10,11 @@ define <2 x i64> @test_extrq(<2 x i64> %
; GENERIC-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_extrq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_extrq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
@@ -29,6 +35,11 @@ define <2 x i64> @test_extrqi(<2 x i64>
; GENERIC-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_extrqi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_extrqi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
@@ -49,6 +60,11 @@ define <2 x i64> @test_insertq(<2 x i64>
; GENERIC-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_insertq:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_insertq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: insertq %xmm1, %xmm0 # sched: [2:2.00]
@@ -69,6 +85,11 @@ define <2 x i64> @test_insertqi(<2 x i64
; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_insertqi:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_insertqi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [2:2.00]
@@ -89,6 +110,11 @@ define void @test_movntsd(i8* %p, <2 x d
; GENERIC-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_movntsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [3:1.00]
@@ -109,6 +135,11 @@ define void @test_movntss(i8* %p, <4 x f
; GENERIC-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
+; BDVER2-LABEL: test_movntss:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-LABEL: test_movntss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movntss %xmm0, (%rdi) # sched: [3:1.00]
Modified: llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll Sat Oct 27 13:36:11 2018
@@ -14,6 +14,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+ssse3 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@@ -113,6 +115,20 @@ define <16 x i8> @test_pabsb(<16 x i8> %
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pabsb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pabsb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pabsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
@@ -242,6 +258,20 @@ define <4 x i32> @test_pabsd(<4 x i32> %
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pabsd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pabsd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pabsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
@@ -371,6 +401,20 @@ define <8 x i16> @test_pabsw(<8 x i16> %
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pabsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pabsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50]
+; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pabsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
@@ -495,6 +539,19 @@ define <8 x i16> @test_palignr(<8 x i16>
; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_palignr:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
+; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
+; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_palignr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
+; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_palignr:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
@@ -605,6 +662,18 @@ define <4 x i32> @test_phaddd(<4 x i32>
; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phaddd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phaddd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phaddd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50]
@@ -714,6 +783,18 @@ define <8 x i16> @test_phaddsw(<8 x i16>
; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phaddsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phaddsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phaddsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50]
@@ -823,6 +904,18 @@ define <8 x i16> @test_phaddw(<8 x i16>
; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phaddw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phaddw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phaddw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50]
@@ -932,6 +1025,18 @@ define <4 x i32> @test_phsubd(<4 x i32>
; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phsubd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phsubd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phsubd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50]
@@ -1041,6 +1146,18 @@ define <8 x i16> @test_phsubsw(<8 x i16>
; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phsubsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phsubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phsubsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50]
@@ -1150,6 +1267,18 @@ define <8 x i16> @test_phsubw(<8 x i16>
; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_phsubw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50]
+; BDVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_phsubw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_phsubw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50]
@@ -1259,6 +1388,18 @@ define <8 x i16> @test_pmaddubsw(<16 x i
; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmaddubsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmaddubsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmaddubsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [2:1.00]
@@ -1369,6 +1510,18 @@ define <8 x i16> @test_pmulhrsw(<8 x i16
; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pmulhrsw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
+; BDVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pmulhrsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pmulhrsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [2:1.00]
@@ -1478,6 +1631,18 @@ define <16 x i8> @test_pshufb(<16 x i8>
; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_pshufb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_pshufb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_pshufb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [2:2.00]
@@ -1591,6 +1756,18 @@ define <16 x i8> @test_psignb(<16 x i8>
; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psignb:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psignb:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psignb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
@@ -1704,6 +1881,18 @@ define <4 x i32> @test_psignd(<4 x i32>
; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psignd:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psignd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psignd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
@@ -1817,6 +2006,18 @@ define <8 x i16> @test_psignw(<8 x i16>
; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BDVER2-SSE-LABEL: test_psignw:
+; BDVER2-SSE: # %bb.0:
+; BDVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
+; BDVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50]
+; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
+;
+; BDVER2-LABEL: test_psignw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
; BTVER2-SSE-LABEL: test_psignw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
Modified: llvm/trunk/test/CodeGen/X86/tbm-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tbm-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tbm-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tbm-schedule.ll Sat Oct 27 13:36:11 2018
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
@@ -14,12 +14,28 @@ define i32 @test_x86_tbm_bextri_u32(i32
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_bextri_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; BDVER-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_bextri_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER2-NEXT: # sched: [2:1.00]
+; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_bextri_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_bextri_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = lshr i32 %a0, 4
%m0 = lshr i32 %a1, 4
@@ -39,12 +55,28 @@ define i64 @test_x86_tbm_bextri_u64(i64
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_bextri_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; BDVER-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_bextri_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER2-NEXT: # sched: [2:1.00]
+; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER2-NEXT: # sched: [7:1.00]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_bextri_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_bextri_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
+; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = lshr i64 %a0, 4
%m0 = lshr i64 %a1, 4
@@ -62,12 +94,26 @@ define i32 @test_x86_tbm_blcfill_u32(i32
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcfill_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcfilll %edi, %ecx
-; BDVER-NEXT: blcfilll (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcfill_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcfilll %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcfill_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcfilll %edi, %ecx
+; BDVER3-NEXT: blcfilll (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcfill_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcfilll %edi, %ecx
+; BDVER4-NEXT: blcfilll (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, 1
%m0 = add i32 %a1, 1
@@ -85,12 +131,26 @@ define i64 @test_x86_tbm_blcfill_u64(i64
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcfill_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcfillq %rdi, %rcx
-; BDVER-NEXT: blcfillq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcfill_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcfillq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcfill_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcfillq %rdi, %rcx
+; BDVER3-NEXT: blcfillq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcfill_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcfillq %rdi, %rcx
+; BDVER4-NEXT: blcfillq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, 1
%m0 = add i64 %a1, 1
@@ -108,12 +168,26 @@ define i32 @test_x86_tbm_blci_u32(i32 %a
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blci_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcil %edi, %ecx
-; BDVER-NEXT: blcil (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blci_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcil %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blcil (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blci_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcil %edi, %ecx
+; BDVER3-NEXT: blcil (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blci_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcil %edi, %ecx
+; BDVER4-NEXT: blcil (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 1, %a0
%m0 = add i32 1, %a1
@@ -133,12 +207,26 @@ define i64 @test_x86_tbm_blci_u64(i64 %a
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blci_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blciq %rdi, %rcx
-; BDVER-NEXT: blciq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blci_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blciq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blciq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blci_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blciq %rdi, %rcx
+; BDVER3-NEXT: blciq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blci_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blciq %rdi, %rcx
+; BDVER4-NEXT: blciq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 1, %a0
%m0 = add i64 1, %a1
@@ -158,12 +246,26 @@ define i32 @test_x86_tbm_blcic_u32(i32 %
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcic_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcicl %edi, %ecx
-; BDVER-NEXT: blcicl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcic_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcicl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blcicl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcic_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcicl %edi, %ecx
+; BDVER3-NEXT: blcicl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcic_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcicl %edi, %ecx
+; BDVER4-NEXT: blcicl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
@@ -183,12 +285,26 @@ define i64 @test_x86_tbm_blcic_u64(i64 %
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcic_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcicq %rdi, %rcx
-; BDVER-NEXT: blcicq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcic_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcicq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blcicq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcic_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcicq %rdi, %rcx
+; BDVER3-NEXT: blcicq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcic_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcicq %rdi, %rcx
+; BDVER4-NEXT: blcicq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
@@ -208,12 +324,26 @@ define i32 @test_x86_tbm_blcmsk_u32(i32
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcmsk_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcmskl %edi, %ecx
-; BDVER-NEXT: blcmskl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcmsk_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcmskl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcmsk_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcmskl %edi, %ecx
+; BDVER3-NEXT: blcmskl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcmsk_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcmskl %edi, %ecx
+; BDVER4-NEXT: blcmskl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, 1
%m0 = add i32 %a1, 1
@@ -231,12 +361,26 @@ define i64 @test_x86_tbm_blcmsk_u64(i64
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcmsk_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcmskq %rdi, %rcx
-; BDVER-NEXT: blcmskq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcmsk_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcmskq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcmsk_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcmskq %rdi, %rcx
+; BDVER3-NEXT: blcmskq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcmsk_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcmskq %rdi, %rcx
+; BDVER4-NEXT: blcmskq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, 1
%m0 = add i64 %a1, 1
@@ -254,12 +398,26 @@ define i32 @test_x86_tbm_blcs_u32(i32 %a
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcs_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcsl %edi, %ecx
-; BDVER-NEXT: blcsl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcs_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcsl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blcsl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcs_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcsl %edi, %ecx
+; BDVER3-NEXT: blcsl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcs_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcsl %edi, %ecx
+; BDVER4-NEXT: blcsl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, 1
%m0 = add i32 %a1, 1
@@ -277,12 +435,26 @@ define i64 @test_x86_tbm_blcs_u64(i64 %a
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blcs_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blcsq %rdi, %rcx
-; BDVER-NEXT: blcsq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blcs_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blcsq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blcsq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blcs_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blcsq %rdi, %rcx
+; BDVER3-NEXT: blcsq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blcs_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blcsq %rdi, %rcx
+; BDVER4-NEXT: blcsq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, 1
%m0 = add i64 %a1, 1
@@ -300,12 +472,26 @@ define i32 @test_x86_tbm_blsfill_u32(i32
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blsfill_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blsfilll %edi, %ecx
-; BDVER-NEXT: blsfilll (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blsfill_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsfilll %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blsfill_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blsfilll %edi, %ecx
+; BDVER3-NEXT: blsfilll (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blsfill_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blsfilll %edi, %ecx
+; BDVER4-NEXT: blsfilll (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, -1
%m0 = add i32 %a1, -1
@@ -323,12 +509,26 @@ define i64 @test_x86_tbm_blsfill_u64(i64
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blsfill_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blsfillq %rdi, %rcx
-; BDVER-NEXT: blsfillq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blsfill_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsfillq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blsfill_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blsfillq %rdi, %rcx
+; BDVER3-NEXT: blsfillq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blsfill_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blsfillq %rdi, %rcx
+; BDVER4-NEXT: blsfillq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, -1
%m0 = add i64 %a1, -1
@@ -346,12 +546,26 @@ define i32 @test_x86_tbm_blsic_u32(i32 %
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blsic_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blsicl %edi, %ecx
-; BDVER-NEXT: blsicl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blsic_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsicl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: blsicl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blsic_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blsicl %edi, %ecx
+; BDVER3-NEXT: blsicl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blsic_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blsicl %edi, %ecx
+; BDVER4-NEXT: blsicl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
@@ -371,12 +585,26 @@ define i64 @test_x86_tbm_blsic_u64(i64 %
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_blsic_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: blsicq %rdi, %rcx
-; BDVER-NEXT: blsicq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_blsic_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: blsicq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: blsicq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_blsic_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: blsicq %rdi, %rcx
+; BDVER3-NEXT: blsicq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_blsic_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: blsicq %rdi, %rcx
+; BDVER4-NEXT: blsicq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
@@ -396,12 +624,26 @@ define i32 @test_x86_tbm_t1mskc_u32(i32
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_t1mskc_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: t1mskcl %edi, %ecx
-; BDVER-NEXT: t1mskcl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_t1mskc_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: t1mskcl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_t1mskc_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: t1mskcl %edi, %ecx
+; BDVER3-NEXT: t1mskcl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_t1mskc_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: t1mskcl %edi, %ecx
+; BDVER4-NEXT: t1mskcl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
@@ -421,12 +663,26 @@ define i64 @test_x86_tbm_t1mskc_u64(i64
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_t1mskc_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: t1mskcq %rdi, %rcx
-; BDVER-NEXT: t1mskcq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_t1mskc_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: t1mskcq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_t1mskc_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: t1mskcq %rdi, %rcx
+; BDVER3-NEXT: t1mskcq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_t1mskc_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: t1mskcq %rdi, %rcx
+; BDVER4-NEXT: t1mskcq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
@@ -446,12 +702,26 @@ define i32 @test_x86_tbm_tzmsk_u32(i32 %
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_tzmsk_u32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: tzmskl %edi, %ecx
-; BDVER-NEXT: tzmskl (%rsi), %eax
-; BDVER-NEXT: addl %ecx, %eax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_tzmsk_u32:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: tzmskl %edi, %ecx # sched: [1:0.33]
+; BDVER2-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50]
+; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_tzmsk_u32:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: tzmskl %edi, %ecx
+; BDVER3-NEXT: tzmskl (%rsi), %eax
+; BDVER3-NEXT: addl %ecx, %eax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_tzmsk_u32:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: tzmskl %edi, %ecx
+; BDVER4-NEXT: tzmskl (%rsi), %eax
+; BDVER4-NEXT: addl %ecx, %eax
+; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
@@ -471,12 +741,26 @@ define i64 @test_x86_tbm_tzmsk_u64(i64 %
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_x86_tbm_tzmsk_u64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: tzmskq %rdi, %rcx
-; BDVER-NEXT: tzmskq (%rsi), %rax
-; BDVER-NEXT: addq %rcx, %rax
-; BDVER-NEXT: retq
+; BDVER2-LABEL: test_x86_tbm_tzmsk_u64:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: tzmskq %rdi, %rcx # sched: [1:0.33]
+; BDVER2-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50]
+; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; BDVER2-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_x86_tbm_tzmsk_u64:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: tzmskq %rdi, %rcx
+; BDVER3-NEXT: tzmskq (%rsi), %rax
+; BDVER3-NEXT: addq %rcx, %rax
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_x86_tbm_tzmsk_u64:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: tzmskq %rdi, %rcx
+; BDVER4-NEXT: tzmskq (%rsi), %rax
+; BDVER4-NEXT: addq %rcx, %rax
+; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
Modified: llvm/trunk/test/CodeGen/X86/x87-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x87-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x87-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x87-schedule.ll Sat Oct 27 13:36:11 2018
@@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@@ -68,6 +69,13 @@ define void @test_f2xm1() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_f2xm1:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: f2xm1 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_f2xm1:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -142,6 +150,13 @@ define void @test_fabs() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fabs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fabs # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fabs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -256,6 +271,18 @@ define void @test_fadd(float *%a0, doubl
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fadd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fadd %st(0), %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: fadd %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fadds (%ecx) # sched: [10:1.00]
+; BDVER2-NEXT: faddl (%eax) # sched: [10:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fadd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -380,6 +407,18 @@ define void @test_faddp_fiadd(i16 *%a0,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_faddp_fiadd:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: faddp %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: faddp %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fiadds (%ecx) # sched: [13:2.00]
+; BDVER2-NEXT: fiaddl (%eax) # sched: [13:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_faddp_fiadd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -480,6 +519,15 @@ define void @test_fbld_fbstp(i8* %a0) op
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fbld_fbstp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fbld (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: fbstp (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fbld_fbstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -558,6 +606,13 @@ define void @test_fchs() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fchs:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fchs # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fchs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -640,6 +695,14 @@ define void @test_fclex() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fclex:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fnclex # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fclex:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -716,6 +779,13 @@ define void @test_fnclex() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fnclex:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fnclex # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fnclex:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -846,6 +916,20 @@ define void @test_fcmov() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fcmov:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fcmovb %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovbe %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmove %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovnb %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovne %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovnu %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: fcmovu %st(1), %st(0) # sched: [3:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fcmov:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -974,6 +1058,18 @@ define void @test_fcom(float *%a0, doubl
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fcom:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fcom %st(1) # sched: [1:1.00]
+; BDVER2-NEXT: fcom %st(3) # sched: [1:1.00]
+; BDVER2-NEXT: fcoms (%ecx) # sched: [8:1.00]
+; BDVER2-NEXT: fcoml (%eax) # sched: [8:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fcom:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -1106,6 +1202,19 @@ define void @test_fcomp_fcompp(float *%a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fcomp_fcompp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fcomp %st(1) # sched: [1:1.00]
+; BDVER2-NEXT: fcomp %st(3) # sched: [1:1.00]
+; BDVER2-NEXT: fcomps (%ecx) # sched: [8:1.00]
+; BDVER2-NEXT: fcompl (%eax) # sched: [8:1.00]
+; BDVER2-NEXT: fcompp # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fcomp_fcompp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -1200,6 +1309,14 @@ define void @test_fcomi_fcomip() optsize
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fcomi_fcomip:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fcomi %st(3) # sched: [3:1.00]
+; BDVER2-NEXT: fcompi %st(3) # sched: [3:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fcomi_fcomip:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1276,6 +1393,13 @@ define void @test_fcos() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fcos:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fcos # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fcos:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1350,6 +1474,13 @@ define void @test_fdecstp() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fdecstp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fdecstp # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fdecstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -1464,6 +1595,18 @@ define void @test_fdiv(float *%a0, doubl
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fdiv:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fdiv %st(0), %st(1) # sched: [14:14.00]
+; BDVER2-NEXT: fdiv %st(2) # sched: [14:14.00]
+; BDVER2-NEXT: fdivs (%ecx) # sched: [31:1.00]
+; BDVER2-NEXT: fdivl (%eax) # sched: [31:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fdiv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -1588,6 +1731,18 @@ define void @test_fdivp_fidiv(i16 *%a0,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fdivp_fidiv:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fdivp %st(1) # sched: [14:14.00]
+; BDVER2-NEXT: fdivp %st(2) # sched: [14:14.00]
+; BDVER2-NEXT: fidivs (%ecx) # sched: [34:1.00]
+; BDVER2-NEXT: fidivl (%eax) # sched: [34:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fdivp_fidiv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -1712,6 +1867,18 @@ define void @test_fdivr(float *%a0, doub
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fdivr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fdivr %st(0), %st(1) # sched: [14:14.00]
+; BDVER2-NEXT: fdivr %st(2) # sched: [14:14.00]
+; BDVER2-NEXT: fdivrs (%ecx) # sched: [31:1.00]
+; BDVER2-NEXT: fdivrl (%eax) # sched: [31:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fdivr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -1836,6 +2003,18 @@ define void @test_fdivrp_fidivr(i16 *%a0
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fdivrp_fidivr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fdivrp %st(1) # sched: [14:14.00]
+; BDVER2-NEXT: fdivrp %st(2) # sched: [14:14.00]
+; BDVER2-NEXT: fidivrs (%ecx) # sched: [34:1.00]
+; BDVER2-NEXT: fidivrl (%eax) # sched: [34:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fdivrp_fidivr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -1920,6 +2099,13 @@ define void @test_ffree() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_ffree:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: ffree %st(0) # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_ffree:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2034,6 +2220,18 @@ define void @test_ficom(i16 *%a0, i32 *%
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_ficom:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: ficoms (%ecx) # sched: [11:2.00]
+; BDVER2-NEXT: ficoml (%eax) # sched: [11:2.00]
+; BDVER2-NEXT: ficomps (%ecx) # sched: [11:2.00]
+; BDVER2-NEXT: ficompl (%eax) # sched: [11:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_ficom:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -2158,6 +2356,18 @@ define void @test_fild(i16 *%a0, i32 *%a
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fild:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: filds (%edx) # sched: [10:1.00]
+; BDVER2-NEXT: fildl (%ecx) # sched: [10:1.00]
+; BDVER2-NEXT: fildll (%eax) # sched: [10:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fild:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -2242,6 +2452,13 @@ define void @test_fincstp() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fincstp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fincstp # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fincstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2324,6 +2541,14 @@ define void @test_finit() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_finit:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fninit # sched: [5:1.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_finit:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2400,6 +2625,13 @@ define void @test_fninit() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fninit:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fninit # sched: [5:1.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fninit:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -2554,6 +2786,23 @@ define void @test_fist_fistp_fisttp(i16*
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fist_fistp_fisttp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fists (%edx) # sched: [9:1.00]
+; BDVER2-NEXT: fistl (%ecx) # sched: [9:1.00]
+; BDVER2-NEXT: fistps (%edx) # sched: [9:1.00]
+; BDVER2-NEXT: fistpl (%ecx) # sched: [9:1.00]
+; BDVER2-NEXT: fistpll (%eax) # sched: [9:1.00]
+; BDVER2-NEXT: fisttps (%edx) # sched: [5:1.00]
+; BDVER2-NEXT: fisttpl (%ecx) # sched: [5:1.00]
+; BDVER2-NEXT: fisttpll (%eax) # sched: [5:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fist_fistp_fisttp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -2696,6 +2945,19 @@ define void @test_fld(i16* %a0, i32* %a1
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fld:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fld %st(0) # sched: [1:1.00]
+; BDVER2-NEXT: flds (%edx) # sched: [9:1.00]
+; BDVER2-NEXT: fldl (%ecx) # sched: [9:1.00]
+; BDVER2-NEXT: fldt (%eax) # sched: [9:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -2798,6 +3060,15 @@ define void @test_fldcw_fldenv(i8* %a0)
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fldcw_fldenv:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fldcw (%eax) # sched: [8:2.00]
+; BDVER2-NEXT: fldenv (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fldcw_fldenv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -2924,6 +3195,19 @@ define void @test_fld1_fldl2e_fldl2t_fld
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fld1 # sched: [1:1.00]
+; BDVER2-NEXT: fldl2e # sched: [1:1.00]
+; BDVER2-NEXT: fldl2t # sched: [1:1.00]
+; BDVER2-NEXT: fldlg2 # sched: [1:1.00]
+; BDVER2-NEXT: fldln2 # sched: [1:1.00]
+; BDVER2-NEXT: fldpi # sched: [1:1.00]
+; BDVER2-NEXT: fldz # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3050,6 +3334,18 @@ define void @test_fmul(float *%a0, doubl
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fmul:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fmul %st(0), %st(1) # sched: [5:1.00]
+; BDVER2-NEXT: fmul %st(2) # sched: [5:1.00]
+; BDVER2-NEXT: fmuls (%ecx) # sched: [12:1.00]
+; BDVER2-NEXT: fmull (%eax) # sched: [12:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fmul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -3174,6 +3470,18 @@ define void @test_fmulp_fimul(i16 *%a0,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fmulp_fimul:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fmulp %st(1) # sched: [5:1.00]
+; BDVER2-NEXT: fmulp %st(2) # sched: [5:1.00]
+; BDVER2-NEXT: fimuls (%ecx) # sched: [15:1.00]
+; BDVER2-NEXT: fimull (%eax) # sched: [15:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fmulp_fimul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -3258,6 +3566,13 @@ define void @test_fnop() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fnop:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fnop # sched: [1:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fnop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3332,6 +3647,13 @@ define void @test_fpatan() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fpatan:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fpatan # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fpatan:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3414,6 +3736,14 @@ define void @test_fprem_fprem1() optsize
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fprem_fprem1:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fprem # sched: [100:0.33]
+; BDVER2-NEXT: fprem1 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fprem_fprem1:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3490,6 +3820,13 @@ define void @test_fptan() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fptan:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fptan # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fptan:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3564,6 +3901,13 @@ define void @test_frndint() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_frndint:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: frndint # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_frndint:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3646,6 +3990,14 @@ define void @test_frstor(i8* %a0) optsiz
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_frstor:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: frstor (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_frstor:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -3738,6 +4090,15 @@ define void @test_fsave(i8* %a0) optsize
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsave:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -3824,6 +4185,14 @@ define void @test_fnsave(i8* %a0) optsiz
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fnsave:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fnsave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -3900,6 +4269,13 @@ define void @test_fscale() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fscale:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fscale # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fscale:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -3974,6 +4350,13 @@ define void @test_fsin() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsin:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsin # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsin:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4048,6 +4431,13 @@ define void @test_fsincos() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsincos:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsincos # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsincos:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4122,6 +4512,13 @@ define void @test_fsqrt() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsqrt:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsqrt # sched: [24:24.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsqrt:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -4268,6 +4665,22 @@ define void @test_fst_fstp(i16* %a0, i32
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fst_fstp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fst %st(0) # sched: [1:1.00]
+; BDVER2-NEXT: fsts (%edx) # sched: [6:1.00]
+; BDVER2-NEXT: fstl (%ecx) # sched: [6:1.00]
+; BDVER2-NEXT: fstp %st(0) # sched: [1:1.00]
+; BDVER2-NEXT: fstpl (%edx) # sched: [6:1.00]
+; BDVER2-NEXT: fstpl (%ecx) # sched: [6:1.00]
+; BDVER2-NEXT: fstpt (%eax) # sched: [6:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fst_fstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -4408,6 +4821,19 @@ define void @test_fstcw_fstenv_fstsw(i8*
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fstcw_fstenv_fstsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fnstcw (%eax) # sched: [7:1.00]
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: fnstsw (%eax) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fstcw_fstenv_fstsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -4518,6 +4944,16 @@ define void @test_fnstcw_fnstenv_fnstsw(
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fnstcw_fnstenv_fnstsw:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fnstcw (%eax) # sched: [7:1.00]
+; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: fnstsw (%eax) # sched: [7:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fnstcw_fnstenv_fnstsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -4638,6 +5074,18 @@ define void @test_fsub(float *%a0, doubl
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsub %st(0), %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: fsub %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fsubs (%ecx) # sched: [10:1.00]
+; BDVER2-NEXT: fsubl (%eax) # sched: [10:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -4762,6 +5210,18 @@ define void @test_fsubp_fisub(i16 *%a0,
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsubp_fisub:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsubp %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: fsubp %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fisubs (%ecx) # sched: [13:2.00]
+; BDVER2-NEXT: fisubl (%eax) # sched: [13:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsubp_fisub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -4886,6 +5346,18 @@ define void @test_fsubr(float *%a0, doub
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsubr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: fsubr %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fsubrs (%ecx) # sched: [10:1.00]
+; BDVER2-NEXT: fsubrl (%eax) # sched: [10:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsubr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -5010,6 +5482,18 @@ define void @test_fsubrp_fisubr(i16 *%a0
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fsubrp_fisubr:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fsubrp %st(1) # sched: [3:1.00]
+; BDVER2-NEXT: fsubrp %st(2) # sched: [3:1.00]
+; BDVER2-NEXT: fisubrs (%ecx) # sched: [13:2.00]
+; BDVER2-NEXT: fisubrl (%eax) # sched: [13:2.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fsubrp_fisubr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -5094,6 +5578,13 @@ define void @test_ftst() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_ftst:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: ftst # sched: [3:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_ftst:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5200,6 +5691,17 @@ define void @test_fucom_fucomp_fucompp()
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fucom_fucomp_fucompp:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fucom %st(1) # sched: [1:1.00]
+; BDVER2-NEXT: fucom %st(3) # sched: [1:1.00]
+; BDVER2-NEXT: fucomp %st(1) # sched: [1:1.00]
+; BDVER2-NEXT: fucomp %st(3) # sched: [1:1.00]
+; BDVER2-NEXT: fucompp # sched: [3:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fucom_fucomp_fucompp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5290,6 +5792,14 @@ define void @test_fucomi_fucomip() optsi
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fucomi_fucomip:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fucomi %st(3) # sched: [3:1.00]
+; BDVER2-NEXT: fucompi %st(3) # sched: [3:1.00]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fucomi_fucomip:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5366,6 +5876,13 @@ define void @test_fwait() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fwait:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: wait # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fwait:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5440,6 +5957,13 @@ define void @test_fxam() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fxam:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fxam # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fxam:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5522,6 +6046,14 @@ define void @test_fxch() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fxch:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fxch %st(1) # sched: [1:0.33]
+; BDVER2-NEXT: fxch %st(3) # sched: [1:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fxch:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5614,6 +6146,15 @@ define void @test_fxrstor_fxsave(i8* %a0
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fxrstor_fxsave:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fxrstor (%eax) # sched: [5:2.00]
+; BDVER2-NEXT: fxsave (%eax) # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fxrstor_fxsave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@@ -5692,6 +6233,13 @@ define void @test_fxtract() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fxtract:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fxtract # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fxtract:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5766,6 +6314,13 @@ define void @test_fyl2x() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fyl2x:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fyl2x # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fyl2x:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@@ -5840,6 +6395,13 @@ define void @test_fyl2xp1() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
+; BDVER2-LABEL: test_fyl2xp1:
+; BDVER2: # %bb.0:
+; BDVER2-NEXT: #APP
+; BDVER2-NEXT: fyl2xp1 # sched: [100:0.33]
+; BDVER2-NEXT: #NO_APP
+; BDVER2-NEXT: retl # sched: [6:1.00]
+;
; BTVER2-LABEL: test_fyl2xp1:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
Modified: llvm/trunk/test/CodeGen/X86/xop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-schedule.ll?rev=345462&r1=345461&r2=345462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll Sat Oct 27 13:36:11 2018
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
@@ -17,16 +17,38 @@ define void @test_vfrczpd(<2 x double> %
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfrczpd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfrczpd %xmm0, %xmm0
-; BDVER-NEXT: vfrczpd %ymm1, %ymm1
-; BDVER-NEXT: vfrczpd (%rdi), %xmm0
-; BDVER-NEXT: vfrczpd (%rsi), %ymm1
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfrczpd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfrczpd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczpd %ymm1, %ymm1 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczpd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER12-NEXT: vfrczpd (%rsi), %ymm1 # sched: [10:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vfrczpd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vfrczpd %xmm0, %xmm0
+; BDVER3-NEXT: vfrczpd %ymm1, %ymm1
+; BDVER3-NEXT: vfrczpd (%rdi), %xmm0
+; BDVER3-NEXT: vfrczpd (%rsi), %ymm1
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: vzeroupper
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vfrczpd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vfrczpd %xmm0, %xmm0
+; BDVER4-NEXT: vfrczpd %ymm1, %ymm1
+; BDVER4-NEXT: vfrczpd (%rdi), %xmm0
+; BDVER4-NEXT: vfrczpd (%rsi), %ymm1
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: vzeroupper
+; BDVER4-NEXT: retq
call void asm sideeffect "vfrczpd $0, $0 \0a\09 vfrczpd $1, $1 \0a\09 vfrczpd $2, $0 \0a\09 vfrczpd $3, $1", "x,x,*m,*m"(<2 x double> %a0, <4 x double> %a1, <2 x double> *%a2, <4 x double> *%a3)
ret void
}
@@ -43,16 +65,38 @@ define void @test_vfrczps(<4 x float> %a
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfrczps:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfrczps %xmm0, %xmm0
-; BDVER-NEXT: vfrczps %ymm1, %ymm1
-; BDVER-NEXT: vfrczps (%rdi), %xmm0
-; BDVER-NEXT: vfrczps (%rsi), %ymm1
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfrczps:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfrczps %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczps %ymm1, %ymm1 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczps (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER12-NEXT: vfrczps (%rsi), %ymm1 # sched: [10:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vfrczps:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vfrczps %xmm0, %xmm0
+; BDVER3-NEXT: vfrczps %ymm1, %ymm1
+; BDVER3-NEXT: vfrczps (%rdi), %xmm0
+; BDVER3-NEXT: vfrczps (%rsi), %ymm1
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: vzeroupper
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vfrczps:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vfrczps %xmm0, %xmm0
+; BDVER4-NEXT: vfrczps %ymm1, %ymm1
+; BDVER4-NEXT: vfrczps (%rdi), %xmm0
+; BDVER4-NEXT: vfrczps (%rsi), %ymm1
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: vzeroupper
+; BDVER4-NEXT: retq
call void asm sideeffect "vfrczps $0, $0 \0a\09 vfrczps $1, $1 \0a\09 vfrczps $2, $0 \0a\09 vfrczps $3, $1", "x,x,*m,*m"(<4 x float> %a0, <4 x double> %a1, <4 x float> *%a2, <4 x double> *%a3)
ret void
}
@@ -66,13 +110,29 @@ define void @test_vfrczsd(<2 x double> %
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfrczsd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfrczsd %xmm0, %xmm0
-; BDVER-NEXT: vfrczsd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfrczsd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfrczsd %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczsd (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vfrczsd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vfrczsd %xmm0, %xmm0
+; BDVER3-NEXT: vfrczsd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vfrczsd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vfrczsd %xmm0, %xmm0
+; BDVER4-NEXT: vfrczsd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vfrczsd $0, $0 \0a\09 vfrczsd $1, $0", "x,*m"(<2 x double> %a0, <2 x double> *%a1)
ret void
}
@@ -86,13 +146,29 @@ define void @test_vfrczss(<4 x float> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vfrczss:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vfrczss %xmm0, %xmm0
-; BDVER-NEXT: vfrczss (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vfrczss:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vfrczss %xmm0, %xmm0 # sched: [3:1.00]
+; BDVER12-NEXT: vfrczss (%rdi), %xmm0 # sched: [9:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vfrczss:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vfrczss %xmm0, %xmm0
+; BDVER3-NEXT: vfrczss (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vfrczss:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vfrczss %xmm0, %xmm0
+; BDVER4-NEXT: vfrczss (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vfrczss $0, $0 \0a\09 vfrczss $1, $0", "x,*m"(<4 x float> %a0, <4 x double> *%a1)
ret void
}
@@ -107,14 +183,32 @@ define void @test_vpcmov_128(<2 x i64> %
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpcmov_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpcmov_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpcmov_128:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpcmov_128:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpcmov $2, $1, $0, $0 \0a\09 vpcmov $3, $1, $0, $0 \0a\09 vpcmov $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -130,15 +224,35 @@ define void @test_vpcmov_256(<4 x i64> %
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpcmov_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpcmov_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpcmov_256:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: vzeroupper
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpcmov_256:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: vzeroupper
+; BDVER4-NEXT: retq
call void asm sideeffect "vpcmov $2, $1, $0, $0 \0a\09 vpcmov $3, $1, $0, $0 \0a\09 vpcmov $2, $3, $0, $0", "x,x,x,*m"(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i64> *%a3)
ret void
}
@@ -158,19 +272,47 @@ define void @test_vpcom(<2 x i64> %a0, <
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpcom:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpcom:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpcom:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpcom:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpcomb $3, $1, $0, $0 \0a\09 vpcomd $3, $1, $0, $0 \0a\09 vpcomq $3, $1, $0, $0 \0a\09 vpcomw $3, $1, $0, $0 \0a\09 vpcomb $3, $2, $0, $0 \0a\09 vpcomd $3, $2, $0, $0 \0a\09 vpcomq $3, $2, $0, $0 \0a\09 vpcomw $3, $2, $0, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 3)
ret void
}
@@ -190,19 +332,47 @@ define void @test_vpcomu(<2 x i64> %a0,
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpcomu:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpcomu:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpcomu:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpcomu:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpcomub $3, $1, $0, $0 \0a\09 vpcomud $3, $1, $0, $0 \0a\09 vpcomuq $3, $1, $0, $0 \0a\09 vpcomuw $3, $1, $0, $0 \0a\09 vpcomub $3, $2, $0, $0 \0a\09 vpcomud $3, $2, $0, $0 \0a\09 vpcomuq $3, $2, $0, $0 \0a\09 vpcomuw $3, $2, $0, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 3)
ret void
}
@@ -217,14 +387,32 @@ define void @test_vpermil2pd_128(<2 x do
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpermil2pd_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpermil2pd_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpermil2pd_128:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpermil2pd_128:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpermil2pd $4, $2, $1, $0, $0 \0a\09 vpermil2pd $4, $2, $3, $0, $0 \0a\09 vpermil2pd $4, $3, $1, $0, $0", "x,x,x,*m,i"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3, i8 3)
ret void
}
@@ -240,15 +428,35 @@ define void @test_vpermil2pd_256(<4 x do
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpermil2pd_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpermil2pd_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpermil2pd_256:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER3-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: vzeroupper
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpermil2pd_256:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER4-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: vzeroupper
+; BDVER4-NEXT: retq
call void asm sideeffect "vpermil2pd $4, $2, $1, $0, $0 \0a\09 vpermil2pd $4, $2, $3, $0, $0 \0a\09 vpermil2pd $4, $3, $1, $0, $0", "x,x,x,*m,i"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3, i8 3)
ret void
}
@@ -263,14 +471,32 @@ define void @test_vpermil2ps_128(<4 x fl
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpermil2ps_128:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpermil2ps_128:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpermil2ps_128:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpermil2ps_128:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpermil2ps $4, $2, $1, $0, $0 \0a\09 vpermil2ps $4, $2, $3, $0, $0 \0a\09 vpermil2ps $4, $3, $1, $0, $0", "x,x,x,*m,i"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3, i8 3)
ret void
}
@@ -286,15 +512,35 @@ define void @test_vpermil2ps_256(<8 x fl
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpermil2ps_256:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: vzeroupper
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpermil2ps_256:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: vzeroupper # sched: [100:0.33]
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpermil2ps_256:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER3-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: vzeroupper
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpermil2ps_256:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0
+; BDVER4-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: vzeroupper
+; BDVER4-NEXT: retq
call void asm sideeffect "vpermil2ps $4, $2, $1, $0, $0 \0a\09 vpermil2ps $4, $2, $3, $0, $0 \0a\09 vpermil2ps $4, $3, $1, $0, $0", "x,x,x,*m,i"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3, i8 3)
ret void
}
@@ -308,13 +554,29 @@ define void @test_vphaddbd(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddbd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddbd %xmm0, %xmm0
-; BDVER-NEXT: vphaddbd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddbd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddbd %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddbd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddbd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddbd %xmm0, %xmm0
+; BDVER3-NEXT: vphaddbd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddbd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddbd %xmm0, %xmm0
+; BDVER4-NEXT: vphaddbd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddbd $0, $0 \0a\09 vphaddbd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -328,13 +590,29 @@ define void @test_vphaddbq(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddbq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddbq %xmm0, %xmm0
-; BDVER-NEXT: vphaddbq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddbq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddbq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddbq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddbq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddbq %xmm0, %xmm0
+; BDVER3-NEXT: vphaddbq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddbq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddbq %xmm0, %xmm0
+; BDVER4-NEXT: vphaddbq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddbq $0, $0 \0a\09 vphaddbq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -348,13 +626,29 @@ define void @test_vphaddbw(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddbw:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddbw %xmm0, %xmm0
-; BDVER-NEXT: vphaddbw (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddbw:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddbw %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddbw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddbw:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddbw %xmm0, %xmm0
+; BDVER3-NEXT: vphaddbw (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddbw:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddbw %xmm0, %xmm0
+; BDVER4-NEXT: vphaddbw (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddbw $0, $0 \0a\09 vphaddbw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -368,13 +662,29 @@ define void @test_vphadddq(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphadddq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphadddq %xmm0, %xmm0
-; BDVER-NEXT: vphadddq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphadddq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphadddq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphadddq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphadddq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphadddq %xmm0, %xmm0
+; BDVER3-NEXT: vphadddq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphadddq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphadddq %xmm0, %xmm0
+; BDVER4-NEXT: vphadddq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphadddq $0, $0 \0a\09 vphadddq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -388,13 +698,29 @@ define void @test_vphaddubd(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddubd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddubd %xmm0, %xmm0
-; BDVER-NEXT: vphaddubd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddubd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddubd %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddubd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddubd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddubd %xmm0, %xmm0
+; BDVER3-NEXT: vphaddubd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddubd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddubd %xmm0, %xmm0
+; BDVER4-NEXT: vphaddubd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddubd $0, $0 \0a\09 vphaddubd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -408,13 +734,29 @@ define void @test_vphaddubq(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddubq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddubq %xmm0, %xmm0
-; BDVER-NEXT: vphaddubq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddubq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddubq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddubq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddubq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddubq %xmm0, %xmm0
+; BDVER3-NEXT: vphaddubq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddubq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddubq %xmm0, %xmm0
+; BDVER4-NEXT: vphaddubq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddubq $0, $0 \0a\09 vphaddubq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -428,13 +770,29 @@ define void @test_vphaddubw(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddubw:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddubw %xmm0, %xmm0
-; BDVER-NEXT: vphaddubw (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddubw:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddubw %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddubw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddubw:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddubw %xmm0, %xmm0
+; BDVER3-NEXT: vphaddubw (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddubw:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddubw %xmm0, %xmm0
+; BDVER4-NEXT: vphaddubw (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddubw $0, $0 \0a\09 vphaddubw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -448,13 +806,29 @@ define void @test_vphaddudq(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddudq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddudq %xmm0, %xmm0
-; BDVER-NEXT: vphaddudq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddudq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddudq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddudq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddudq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddudq %xmm0, %xmm0
+; BDVER3-NEXT: vphaddudq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddudq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddudq %xmm0, %xmm0
+; BDVER4-NEXT: vphaddudq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddudq $0, $0 \0a\09 vphaddudq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -468,13 +842,29 @@ define void @test_vphadduwd(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphadduwd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphadduwd %xmm0, %xmm0
-; BDVER-NEXT: vphadduwd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphadduwd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphadduwd %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphadduwd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphadduwd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphadduwd %xmm0, %xmm0
+; BDVER3-NEXT: vphadduwd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphadduwd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphadduwd %xmm0, %xmm0
+; BDVER4-NEXT: vphadduwd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphadduwd $0, $0 \0a\09 vphadduwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -488,13 +878,29 @@ define void @test_vphadduwq(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphadduwq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphadduwq %xmm0, %xmm0
-; BDVER-NEXT: vphadduwq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphadduwq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphadduwq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphadduwq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphadduwq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphadduwq %xmm0, %xmm0
+; BDVER3-NEXT: vphadduwq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphadduwq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphadduwq %xmm0, %xmm0
+; BDVER4-NEXT: vphadduwq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphadduwq $0, $0 \0a\09 vphadduwq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -508,13 +914,29 @@ define void @test_vphaddwd(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddwd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddwd %xmm0, %xmm0
-; BDVER-NEXT: vphaddwd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddwd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddwd %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddwd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddwd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddwd %xmm0, %xmm0
+; BDVER3-NEXT: vphaddwd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddwd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddwd %xmm0, %xmm0
+; BDVER4-NEXT: vphaddwd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddwd $0, $0 \0a\09 vphaddwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -528,13 +950,29 @@ define void @test_vphaddwq(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphaddwq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphaddwq %xmm0, %xmm0
-; BDVER-NEXT: vphaddwq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphaddwq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphaddwq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphaddwq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphaddwq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphaddwq %xmm0, %xmm0
+; BDVER3-NEXT: vphaddwq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphaddwq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphaddwq %xmm0, %xmm0
+; BDVER4-NEXT: vphaddwq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphaddwq $0, $0 \0a\09 vphaddwq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -548,13 +986,29 @@ define void @test_vphsubbw(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphsubbw:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphsubbw %xmm0, %xmm0
-; BDVER-NEXT: vphsubbw (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphsubbw:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphsubbw %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphsubbw (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphsubbw:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphsubbw %xmm0, %xmm0
+; BDVER3-NEXT: vphsubbw (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphsubbw:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphsubbw %xmm0, %xmm0
+; BDVER4-NEXT: vphsubbw (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphsubbw $0, $0 \0a\09 vphsubbw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -568,13 +1022,29 @@ define void @test_vphsubdq(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphsubdq:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphsubdq %xmm0, %xmm0
-; BDVER-NEXT: vphsubdq (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphsubdq:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphsubdq %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphsubdq (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphsubdq:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphsubdq %xmm0, %xmm0
+; BDVER3-NEXT: vphsubdq (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphsubdq:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphsubdq %xmm0, %xmm0
+; BDVER4-NEXT: vphsubdq (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphsubdq $0, $0 \0a\09 vphsubdq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -588,13 +1058,29 @@ define void @test_vphsubwd(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vphsubwd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vphsubwd %xmm0, %xmm0
-; BDVER-NEXT: vphsubwd (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vphsubwd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vphsubwd %xmm0, %xmm0 # sched: [3:1.50]
+; BDVER12-NEXT: vphsubwd (%rdi), %xmm0 # sched: [9:1.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vphsubwd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vphsubwd %xmm0, %xmm0
+; BDVER3-NEXT: vphsubwd (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vphsubwd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vphsubwd %xmm0, %xmm0
+; BDVER4-NEXT: vphsubwd (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vphsubwd $0, $0 \0a\09 vphsubwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
ret void
}
@@ -608,13 +1094,29 @@ define void @test_vpmacsdd(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacsdd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacsdd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacsdd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacsdd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacsdd $2, $1, $0, $0 \0a\09 vpmacsdd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -628,13 +1130,29 @@ define void @test_vpmacsdqh(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacsdqh:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacsdqh:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacsdqh:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacsdqh:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacsdqh $2, $1, $0, $0 \0a\09 vpmacsdqh $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -648,13 +1166,29 @@ define void @test_vpmacsdql(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacsdql:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacsdql:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacsdql:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacsdql:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacsdql $2, $1, $0, $0 \0a\09 vpmacsdql $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -668,13 +1202,29 @@ define void @test_vpmacssdd(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacssdd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacssdd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacssdd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacssdd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacssdd $2, $1, $0, $0 \0a\09 vpmacssdd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -688,13 +1238,29 @@ define void @test_vpmacssdqh(<2 x i64> %
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacssdqh:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacssdqh:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacssdqh:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacssdqh:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacssdqh $2, $1, $0, $0 \0a\09 vpmacssdqh $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -708,13 +1274,29 @@ define void @test_vpmacssdql(<2 x i64> %
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacssdql:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacssdql:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacssdql:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacssdql:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacssdql $2, $1, $0, $0 \0a\09 vpmacssdql $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -728,13 +1310,29 @@ define void @test_vpmacsswd(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacsswd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacsswd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacsswd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacsswd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacsswd $2, $1, $0, $0 \0a\09 vpmacsswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -748,13 +1346,29 @@ define void @test_vpmacssww(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacssww:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacssww:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacssww:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacssww:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacssww $2, $1, $0, $0 \0a\09 vpmacssww $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -768,13 +1382,29 @@ define void @test_vpmacswd(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacswd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacswd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacswd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacswd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacswd $2, $1, $0, $0 \0a\09 vpmacswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -788,13 +1418,29 @@ define void @test_vpmacsww(<2 x i64> %a0
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmacsww:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmacsww:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmacsww:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmacsww:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmacsww $2, $1, $0, $0 \0a\09 vpmacsww $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -808,13 +1454,29 @@ define void @test_vpmadcsswd(<2 x i64> %
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmadcsswd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmadcsswd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmadcsswd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmadcsswd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmadcsswd $2, $1, $0, $0 \0a\09 vpmadcsswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -828,13 +1490,29 @@ define void @test_vpmadcswd(<2 x i64> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpmadcswd:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpmadcswd:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; BDVER12-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpmadcswd:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpmadcswd:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpmadcswd $2, $1, $0, $0 \0a\09 vpmadcswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -849,14 +1527,32 @@ define void @test_vpperm(<2 x i64> %a0,
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpperm:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpperm:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; BDVER12-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpperm:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpperm:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpperm $2, $1, $0, $0 \0A\09 vpperm $3, $1, $0, $0 \0A\09 vpperm $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
ret void
}
@@ -888,31 +1584,83 @@ define void @test_vprot(<2 x i64> %a0, <
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vprot:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vprotb %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vprotd %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vprotq %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vprotw %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vprotb (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vprotd (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vprotq (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vprotw (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vprotb %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vprotd %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vprotq %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vprotw %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vprotb $7, %xmm0, %xmm0
-; BDVER-NEXT: vprotd $7, %xmm0, %xmm0
-; BDVER-NEXT: vprotq $7, %xmm0, %xmm0
-; BDVER-NEXT: vprotw $7, %xmm0, %xmm0
-; BDVER-NEXT: vprotb $7, (%rdi), %xmm0
-; BDVER-NEXT: vprotd $7, (%rdi), %xmm0
-; BDVER-NEXT: vprotq $7, (%rdi), %xmm0
-; BDVER-NEXT: vprotw $7, (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vprot:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vprotb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotb $7, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotd $7, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotq $7, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotw $7, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vprot:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vprotb %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vprotd %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vprotq %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vprotw %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vprotb (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vprotd (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vprotq (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vprotw (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vprotb %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vprotd %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vprotq %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vprotw %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vprotb $7, %xmm0, %xmm0
+; BDVER3-NEXT: vprotd $7, %xmm0, %xmm0
+; BDVER3-NEXT: vprotq $7, %xmm0, %xmm0
+; BDVER3-NEXT: vprotw $7, %xmm0, %xmm0
+; BDVER3-NEXT: vprotb $7, (%rdi), %xmm0
+; BDVER3-NEXT: vprotd $7, (%rdi), %xmm0
+; BDVER3-NEXT: vprotq $7, (%rdi), %xmm0
+; BDVER3-NEXT: vprotw $7, (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vprot:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vprotb %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vprotd %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vprotq %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vprotw %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vprotb (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vprotd (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vprotq (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vprotw (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vprotb %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vprotd %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vprotq %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vprotw %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vprotb $7, %xmm0, %xmm0
+; BDVER4-NEXT: vprotd $7, %xmm0, %xmm0
+; BDVER4-NEXT: vprotq $7, %xmm0, %xmm0
+; BDVER4-NEXT: vprotw $7, %xmm0, %xmm0
+; BDVER4-NEXT: vprotb $7, (%rdi), %xmm0
+; BDVER4-NEXT: vprotd $7, (%rdi), %xmm0
+; BDVER4-NEXT: vprotq $7, (%rdi), %xmm0
+; BDVER4-NEXT: vprotw $7, (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vprotb $1, $0, $0 \0A\09 vprotd $1, $0, $0 \0A\09 vprotq $1, $0, $0 \0A\09 vprotw $1, $0, $0 \0A\09 vprotb $2, $0, $0 \0A\09 vprotd $2, $0, $0 \0A\09 vprotq $2, $0, $0 \0A\09 vprotw $2, $0, $0 \0A\09 vprotb $0, $2, $0 \0A\09 vprotd $0, $2, $0 \0A\09 vprotq $0, $2, $0 \0A\09 vprotw $0, $2, $0 \0A\09 vprotb $3, $0, $0 \0A\09 vprotd $3, $0, $0 \0A\09 vprotq $3, $0, $0 \0A\09 vprotw $3, $0, $0 \0A\09 vprotb $3, $2, $0 \0A\09 vprotd $3, $2, $0 \0A\09 vprotq $3, $2, $0 \0A\09 vprotw $3, $2, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 7)
ret void
}
@@ -936,23 +1684,59 @@ define void @test_vpsha(<2 x i64> %a0, <
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpsha:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpshab %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshad %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshaq %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshaw %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshab (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshad (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshaq (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshaw (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshab %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshad %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshaq %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshaw %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpsha:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpshab %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshaq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshaw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpsha:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshad %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshaw %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshab (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshad (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshaq (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshaw (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshab %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshad %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshaq %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshaw %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpsha:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshad %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshaw %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshab (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshad (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshaq (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshaw (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshab %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshad %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshaq %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshaw %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpshab $1, $0, $0 \0A\09 vpshad $1, $0, $0 \0A\09 vpshaq $1, $0, $0 \0A\09 vpshaw $1, $0, $0 \0A\09 vpshab $2, $0, $0 \0A\09 vpshad $2, $0, $0 \0A\09 vpshaq $2, $0, $0 \0A\09 vpshaw $2, $0, $0 \0A\09 vpshab $0, $2, $0 \0A\09 vpshad $0, $2, $0 \0A\09 vpshaq $0, $2, $0 \0A\09 vpshaw $0, $2, $0", "x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
ret void
}
@@ -976,23 +1760,59 @@ define void @test_vpshl(<2 x i64> %a0, <
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; BDVER-LABEL: test_vpshl:
-; BDVER: # %bb.0:
-; BDVER-NEXT: #APP
-; BDVER-NEXT: vpshlb %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshld %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshlq %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshlw %xmm1, %xmm0, %xmm0
-; BDVER-NEXT: vpshlb (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshld (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshlq (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshlw (%rdi), %xmm0, %xmm0
-; BDVER-NEXT: vpshlb %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshld %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshlq %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: vpshlw %xmm0, (%rdi), %xmm0
-; BDVER-NEXT: #NO_APP
-; BDVER-NEXT: retq
+; BDVER12-LABEL: test_vpshl:
+; BDVER12: # %bb.0:
+; BDVER12-NEXT: #APP
+; BDVER12-NEXT: vpshlb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; BDVER12-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
+; BDVER12-NEXT: #NO_APP
+; BDVER12-NEXT: retq # sched: [1:1.00]
+;
+; BDVER3-LABEL: test_vpshl:
+; BDVER3: # %bb.0:
+; BDVER3-NEXT: #APP
+; BDVER3-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshld %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshlq %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; BDVER3-NEXT: vpshlb (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshld (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshlq (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshlw (%rdi), %xmm0, %xmm0
+; BDVER3-NEXT: vpshlb %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshld %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshlq %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: vpshlw %xmm0, (%rdi), %xmm0
+; BDVER3-NEXT: #NO_APP
+; BDVER3-NEXT: retq
+;
+; BDVER4-LABEL: test_vpshl:
+; BDVER4: # %bb.0:
+; BDVER4-NEXT: #APP
+; BDVER4-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshld %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshlq %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; BDVER4-NEXT: vpshlb (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshld (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshlq (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshlw (%rdi), %xmm0, %xmm0
+; BDVER4-NEXT: vpshlb %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshld %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshlq %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: vpshlw %xmm0, (%rdi), %xmm0
+; BDVER4-NEXT: #NO_APP
+; BDVER4-NEXT: retq
call void asm sideeffect "vpshlb $1, $0, $0 \0A\09 vpshld $1, $0, $0 \0A\09 vpshlq $1, $0, $0 \0A\09 vpshlw $1, $0, $0 \0A\09 vpshlb $2, $0, $0 \0A\09 vpshld $2, $0, $0 \0A\09 vpshlq $2, $0, $0 \0A\09 vpshlw $2, $0, $0 \0A\09 vpshlb $0, $2, $0 \0A\09 vpshld $0, $2, $0 \0A\09 vpshlq $0, $2, $0 \0A\09 vpshlw $0, $2, $0", "x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
ret void
}
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/add-sequence.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/add-sequence.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/add-sequence.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/add-sequence.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,95 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1000 -timeline < %s | FileCheck %s
+
+add %eax, %ecx
+add %esi, %eax
+add %eax, %edx
+
+# CHECK: Iterations: 1000
+# CHECK-NEXT: Instructions: 3000
+# CHECK-NEXT: Total Cycles: 1004
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 2.99
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 addl %eax, %ecx
+# CHECK-NEXT: 1 1 0.33 addl %esi, %eax
+# CHECK-NEXT: 1 1 0.33 addl %eax, %edx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - addl %eax, %ecx
+# CHECK-NEXT: - - - - - 1.00 - - addl %esi, %eax
+# CHECK-NEXT: - - 1.00 - - - - - addl %eax, %edx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . addl %eax, %ecx
+# CHECK-NEXT: [0,1] DeER . . . addl %esi, %eax
+# CHECK-NEXT: [0,2] D=eER. . . addl %eax, %edx
+# CHECK-NEXT: [1,0] D=eER. . . addl %eax, %ecx
+# CHECK-NEXT: [1,1] .DeER. . . addl %esi, %eax
+# CHECK-NEXT: [1,2] .D=eER . . addl %eax, %edx
+# CHECK-NEXT: [2,0] .D=eER . . addl %eax, %ecx
+# CHECK-NEXT: [2,1] .D=eER . . addl %esi, %eax
+# CHECK-NEXT: [2,2] . D=eER . . addl %eax, %edx
+# CHECK-NEXT: [3,0] . D=eER . . addl %eax, %ecx
+# CHECK-NEXT: [3,1] . D=eER . . addl %esi, %eax
+# CHECK-NEXT: [3,2] . D==eER . . addl %eax, %edx
+# CHECK-NEXT: [4,0] . D=eER . . addl %eax, %ecx
+# CHECK-NEXT: [4,1] . D=eER . . addl %esi, %eax
+# CHECK-NEXT: [4,2] . D==eER . . addl %eax, %edx
+# CHECK-NEXT: [5,0] . D==eER . . addl %eax, %ecx
+# CHECK-NEXT: [5,1] . D=eER . . addl %esi, %eax
+# CHECK-NEXT: [5,2] . D==eER. . addl %eax, %edx
+# CHECK-NEXT: [6,0] . D==eER. . addl %eax, %ecx
+# CHECK-NEXT: [6,1] . D==eER. . addl %esi, %eax
+# CHECK-NEXT: [6,2] . D==eER . addl %eax, %edx
+# CHECK-NEXT: [7,0] . D==eER . addl %eax, %ecx
+# CHECK-NEXT: [7,1] . D==eER . addl %esi, %eax
+# CHECK-NEXT: [7,2] . D===eER . addl %eax, %edx
+# CHECK-NEXT: [8,0] . .D==eER . addl %eax, %ecx
+# CHECK-NEXT: [8,1] . .D==eER . addl %esi, %eax
+# CHECK-NEXT: [8,2] . .D===eER. addl %eax, %edx
+# CHECK-NEXT: [9,0] . .D===eER. addl %eax, %ecx
+# CHECK-NEXT: [9,1] . . D==eER. addl %esi, %eax
+# CHECK-NEXT: [9,2] . . D===eER addl %eax, %edx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 10 2.5 0.1 0.0 addl %eax, %ecx
+# CHECK-NEXT: 1. 10 2.2 0.1 0.0 addl %esi, %eax
+# CHECK-NEXT: 2. 10 3.0 0.0 0.0 addl %eax, %edx
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,63 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s
+
+## Sets register RAX.
+imulq $5, %rcx, %rax
+
+## Kills the previous definition of RAX.
+## The upper portion of RAX is cleared.
+lzcnt %ecx, %eax
+
+## The AND can start immediately after the LZCNT.
+## It doesn't need to wait for the IMUL.
+and %rcx, %rax
+bsf %rax, %rcx
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 803
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imulq $5, %rcx, %rax
+# CHECK-NEXT: 1 3 1.00 lzcntl %ecx, %eax
+# CHECK-NEXT: 1 1 0.33 andq %rcx, %rax
+# CHECK-NEXT: 1 3 1.00 bsfq %rax, %rcx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . imulq $5, %rcx, %rax
+# CHECK-NEXT: [0,1] D=eeeER . . . lzcntl %ecx, %eax
+# CHECK-NEXT: [0,2] D====eER . . . andq %rcx, %rax
+# CHECK-NEXT: [0,3] D=====eeeER . . bsfq %rax, %rcx
+# CHECK-NEXT: [1,0] .D=======eeeER . . imulq $5, %rcx, %rax
+# CHECK-NEXT: [1,1] .D========eeeER. . lzcntl %ecx, %eax
+# CHECK-NEXT: [1,2] .D===========eER . andq %rcx, %rax
+# CHECK-NEXT: [1,3] .D============eeeER bsfq %rax, %rcx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 4.5 0.5 0.0 imulq $5, %rcx, %rax
+# CHECK-NEXT: 1. 2 5.5 1.5 0.0 lzcntl %ecx, %eax
+# CHECK-NEXT: 2. 2 8.5 0.0 0.0 andq %rcx, %rax
+# CHECK-NEXT: 3. 2 9.5 0.0 0.0 bsfq %rax, %rcx
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,137 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s
+
+# In this test, the VDIVPS takes 38 cycles to write to register YMM3. The first
+# VADDPS does not depend on the VDIVPS (the WAW dependency is eliminated at
+# register renaming stage). So the first VADDPS can be executed in parallel to
+# the VDIVPS. That VADDPS also writes to register XMM3, and the upper half of
+# YMM3 is implicitly cleared. As a consequence, the definition of YMM3 from the
+# VDIVPS is killed, and the subsequent VADDPS instructions don't need to wait
+# for the VDIVPS to complete.
+# The block reciprocal throughput is limited by the VDIVPS reciprocal throughput
+# (which is 38 cycles). The sequence of VADDPS can be executed in parallel on
+# the FPA unit; their latency is "hidden" by the long latency of the VDIVPS.
+
+vdivps %ymm0, %ymm1, %ymm3
+vaddps %xmm0, %xmm1, %xmm3
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vaddps %ymm3, %ymm1, %ymm4
+vandps %xmm4, %xmm1, %xmm0
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1800
+# CHECK-NEXT: Total Cycles: 2804
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.64
+# CHECK-NEXT: Block RThroughput: 28.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 3 29 28.00 vdivps %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 1 1 1.00 vandps %xmm4, %xmm1, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: [0,1] DeeeE--------------------------R . . . . . . vaddps %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: [0,2] .D==eeeE-----------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,3] .D===eeeE----------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,4] .D====eeeE---------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,5] .D=====eeeE--------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,6] . D=====eeeE-------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,7] . D======eeeE------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,8] . D=======eeeE-----------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,9] . D========eeeE----------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,10] . D========eeeE---------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,11] . D=========eeeE--------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,12] . D==========eeeE-------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,13] . D===========eeeE------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,14] . D===========eeeE-----------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,15] . D============eeeE----------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,16] . D=============eeeE---------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [0,17] . D================eE--------R . . . . . . vandps %xmm4, %xmm1, %xmm0
+# CHECK-NEXT: [1,0] . D=======================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeER vdivps %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: [1,1] . D================eeeE---------------------------------R vaddps %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: [1,2] . .D==================eeeE------------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,3] . .D===================eeeE-----------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,4] . .D====================eeeE----------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,5] . .D=====================eeeE---------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,6] . . D=====================eeeE--------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,7] . . D======================eeeE-------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,8] . . D=======================eeeE------------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,9] . . D========================eeeE-----------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,10] . . D========================eeeE----------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,11] . . D=========================eeeE---------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,12] . . D==========================eeeE--------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,13] . . D===========================eeeE-------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,14] . . D===========================eeeE------------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,15] . . D============================eeeE-----------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,16] . . D=============================eeeE----------------R vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: [1,17] . . D================================eE---------------R vandps %xmm4, %xmm1, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 12.5 4.0 0.0 vdivps %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: 1. 2 9.0 0.5 29.5 vaddps %xmm0, %xmm1, %xmm3
+# CHECK-NEXT: 2. 2 11.0 0.0 26.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 3. 2 12.0 1.0 25.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 4. 2 13.0 2.0 24.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 5. 2 14.0 3.0 23.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 6. 2 14.0 4.0 22.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 7. 2 15.0 5.0 21.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 8. 2 16.0 6.0 20.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 9. 2 17.0 7.0 19.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 10. 2 17.0 8.0 18.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 11. 2 18.0 9.0 17.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 12. 2 19.0 10.0 16.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 13. 2 20.0 11.0 15.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 14. 2 20.0 12.0 14.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 15. 2 21.0 13.0 13.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 16. 2 22.0 14.0 12.5 vaddps %ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 17. 2 25.0 0.0 11.5 vandps %xmm4, %xmm1, %xmm0
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,72 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
+
+# Perf stat reports an IPC of 1.97 for this block of code.
+
+# The CMP instruction doesn't depend on the value of EAX. It can set the flags
+# without having to read the inputs.
+
+cmp %eax, %eax
+cmovae %ebx, %eax
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 3000
+# CHECK-NEXT: Total Cycles: 4503
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.67
+# CHECK-NEXT: Block RThroughput: 0.8
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 cmpl %eax, %eax
+# CHECK-NEXT: 2 2 0.67 cmovael %ebx, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - 1.00 - - cmpl %eax, %eax
+# CHECK-NEXT: - - 1.00 1.00 - - - - cmovael %ebx, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . .. cmpl %eax, %eax
+# CHECK-NEXT: [0,1] D=eeER .. cmovael %ebx, %eax
+# CHECK-NEXT: [1,0] D===eER .. cmpl %eax, %eax
+# CHECK-NEXT: [1,1] .D===eeER .. cmovael %ebx, %eax
+# CHECK-NEXT: [2,0] .D=====eER.. cmpl %eax, %eax
+# CHECK-NEXT: [2,1] . D=====eeER cmovael %ebx, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 3.7 0.3 0.0 cmpl %eax, %eax
+# CHECK-NEXT: 1. 3 4.0 0.0 0.0 cmovael %ebx, %eax
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,87 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
+
+# perf stat reports an IPC of 2.00 for this block of code.
+
+# All of the vector packed compares from this test are dependency breaking
+# instructions. That means, there is no RAW dependency between any of the
+# instructions, and the code can be fully parallelized in hardware.
+
+vpcmpeqb %xmm0, %xmm0, %xmm1
+vpcmpeqw %xmm1, %xmm1, %xmm2
+vpcmpeqd %xmm2, %xmm2, %xmm3
+vpcmpeqq %xmm3, %xmm3, %xmm0
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 6000
+# CHECK-NEXT: Total Cycles: 6003
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 2.00 - 2.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: - - - 1.00 - - - - vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: - - - 1.00 - - - - vpcmpeqq %xmm3, %xmm3, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] D=eER. . . vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [0,2] D==eER . . vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [0,3] D===eER . . vpcmpeqq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: [1,0] .D===eER . . vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [1,1] .D====eER . . vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [1,2] .D=====eER. . vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [1,3] .D======eER . vpcmpeqq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: [2,0] . D======eER . vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [2,1] . D=======eER . vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [2,2] . D========eER. vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [2,3] . D=========eER vpcmpeqq %xmm3, %xmm3, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 4.0 0.3 0.0 vpcmpeqb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 3 5.0 0.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: 2. 3 6.0 0.0 0.0 vpcmpeqd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 3. 3 7.0 0.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,87 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
+
+# perf stat reports an IPC of 2.00 for this block of code.
+
+# All of the vector packed compares from this test are zero idioms. These zero
+# idioms are all detected and removed by the register renamer. That means, no
+# uOp is executed, and there is no RAW dependency for any of the packed
+# compares.
+
+vpcmpgtb %xmm0, %xmm0, %xmm1
+vpcmpgtw %xmm1, %xmm1, %xmm2
+vpcmpgtd %xmm2, %xmm2, %xmm3
+vpcmpgtq %xmm3, %xmm3, %xmm0
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 6000
+# CHECK-NEXT: Total Cycles: 1501
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 4.00
+# CHECK-NEXT: IPC: 4.00
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm0
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: - - - - - - - - vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123
+
+# CHECK: [0,0] DR . vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] DR . vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [0,2] DR . vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [0,3] DR . vpcmpgtq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: [1,0] .DR. vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [1,1] .DR. vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [1,2] .DR. vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [1,3] .DR. vpcmpgtq %xmm3, %xmm3, %xmm0
+# CHECK-NEXT: [2,0] . DR vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [2,1] . DR vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [2,2] . DR vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [2,3] . DR vpcmpgtq %xmm3, %xmm3, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 0.0 0.0 0.0 vpcmpgtb %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 3 0.0 0.0 0.0 vpcmpgtw %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: 2. 3 0.0 0.0 0.0 vpcmpgtd %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 3. 3 0.0 0.0 0.0 vpcmpgtq %xmm3, %xmm3, %xmm0
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
+
+# perf stat reports an IPC of 1.00 for this code block.
+
+# Although both SBB are dependency breaking instructions, there is still an
+# implicit dependency on EFLAGS which limits the ILP. So, the hardware backend
+# can only execute one instruction per cycle.
+
+sbb %edx, %edx
+sbb %eax, %eax
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 3000
+# CHECK-NEXT: Total Cycles: 6003
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 2 0.67 sbbl %edx, %edx
+# CHECK-NEXT: 2 2 0.67 sbbl %eax, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - sbbl %edx, %edx
+# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - sbbl %eax, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . sbbl %edx, %edx
+# CHECK-NEXT: [0,1] D==eeER . . sbbl %eax, %eax
+# CHECK-NEXT: [1,0] .D===eeER . . sbbl %edx, %edx
+# CHECK-NEXT: [1,1] .D=====eeER . sbbl %eax, %eax
+# CHECK-NEXT: [2,0] . D======eeER . sbbl %edx, %edx
+# CHECK-NEXT: [2,1] . D========eeER sbbl %eax, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 4.0 0.3 0.0 sbbl %edx, %edx
+# CHECK-NEXT: 1. 3 6.0 0.0 0.0 sbbl %eax, %eax
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,80 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
+
+# perf stat reports a throughput of 1.51 IPC for this block of code.
+
+# The SBB does not depend on the value of register EAX. That means, it doesn't
+# have to wait for the IMUL to write-back on EAX. However, it still depends on
+# the ADD for EFLAGS.
+
+imul %edx, %eax
+add %edx, %edx
+sbb %eax, %eax
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 7503
+# CHECK-NEXT: Total uOps: 6000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.60
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imull %edx, %eax
+# CHECK-NEXT: 1 1 0.33 addl %edx, %edx
+# CHECK-NEXT: 2 2 0.67 sbbl %eax, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - imull %edx, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %edx, %edx
+# CHECK-NEXT: - - 1.00 - - 1.00 - - sbbl %eax, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . imull %edx, %eax
+# CHECK-NEXT: [0,1] DeE--R . . . addl %edx, %edx
+# CHECK-NEXT: [0,2] D===eeER . . . sbbl %eax, %eax
+# CHECK-NEXT: [1,0] .D====eeeER . . imull %edx, %eax
+# CHECK-NEXT: [1,1] .DeE------R . . addl %edx, %edx
+# CHECK-NEXT: [1,2] .D=======eeER . . sbbl %eax, %eax
+# CHECK-NEXT: [2,0] . D========eeeER . imull %edx, %eax
+# CHECK-NEXT: [2,1] . DeE----------R . addl %edx, %edx
+# CHECK-NEXT: [2,2] . D===========eeER sbbl %eax, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 5.0 0.3 0.0 imull %edx, %eax
+# CHECK-NEXT: 1. 3 1.0 0.3 6.0 addl %edx, %edx
+# CHECK-NEXT: 2. 3 8.0 0.0 0.0 sbbl %eax, %eax
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,95 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=500 -timeline < %s | FileCheck %s
+
+vpmuld %xmm0, %xmm0, %xmm1
+vpaddd %xmm1, %xmm1, %xmm0
+vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Iterations: 500
+# CHECK-NEXT: Instructions: 1500
+# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 1.00 vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1 1 0.50 vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: 1 1 0.50 vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - - vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: - - - - - 1.00 - - vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: - - - 1.00 - - - - vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123
+
+# CHECK: [0,0] DeeeeeER . . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] D=====eER . . . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [0,2] D======eER. . . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [1,0] D======eeeeeER . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [1,1] .D==========eER. . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [1,2] .D===========eER . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [2,0] .D===========eeeeeER. . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [2,1] .D================eER . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [2,2] . D================eER . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [3,0] . D================eeeeeER . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [3,1] . D=====================eER . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [3,2] . D======================eER . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [4,0] . D=====================eeeeeER . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [4,1] . D==========================eER . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [4,2] . D===========================eER . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [5,0] . D===========================eeeeeER . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [5,1] . D===============================eER . . . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [5,2] . D================================eER. . . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [6,0] . D================================eeeeeER . . . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [6,1] . D=====================================eER. . . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [6,2] . D=====================================eER . . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [7,0] . D=====================================eeeeeER. . . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [7,1] . D==========================================eER . . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [7,2] . D===========================================eER . . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [8,0] . .D==========================================eeeeeER . . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [8,1] . .D===============================================eER . . vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [8,2] . .D================================================eER . . vpaddd %xmm0, %xmm0, %xmm3
+# CHECK-NEXT: [9,0] . .D================================================eeeeeER . vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [9,1] . . D====================================================eER. vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: [9,2] . . D=====================================================eER vpaddd %xmm0, %xmm0, %xmm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 10 25.0 0.1 0.0 vpmuldq %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 10 29.7 0.0 0.0 vpaddd %xmm1, %xmm1, %xmm0
+# CHECK-NEXT: 2. 10 30.5 0.0 0.0 vpaddd %xmm0, %xmm0, %xmm3
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dot-product.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dot-product.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dot-product.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dot-product.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,74 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=300 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+vmulps %xmm0, %xmm1, %xmm2
+vhaddps %xmm2, %xmm2, %xmm3
+vhaddps %xmm3, %xmm3, %xmm4
+
+# CHECK: Iterations: 300
+# CHECK-NEXT: Instructions: 900
+# CHECK-NEXT: Total Cycles: 1211
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.73
+# CHECK-NEXT: IPC: 0.74
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 3 5 2.00 vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 3 5 2.00 vhaddps %xmm3, %xmm3, %xmm4
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 2.00 - 4.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm3, %xmm3, %xmm4
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeeER . . . . vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [0,2] .D==========eeeeeER . . vhaddps %xmm3, %xmm3, %xmm4
+# CHECK-NEXT: [1,0] .DeeeeeE----------R . . vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,1] . D=====eeeeeE----R . . vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [1,2] . D==========eeeeeER . vhaddps %xmm3, %xmm3, %xmm4
+# CHECK-NEXT: [2,0] . DeeeeeE----------R . vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [2,1] . D=====eeeeeE----R . vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: [2,2] . D==========eeeeeER vhaddps %xmm3, %xmm3, %xmm4
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.0 1.0 6.7 vmulps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1. 3 6.0 0.7 2.7 vhaddps %xmm2, %xmm2, %xmm3
+# CHECK-NEXT: 2. 3 11.0 1.0 0.0 vhaddps %xmm3, %xmm3, %xmm4
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
+
+vshufps $0, %xmm0, %xmm1, %xmm1
+vhaddps (%rdi), %xmm1, %xmm2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 15
+# CHECK-NEXT: Total uOps: 5
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.13
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: 4 11 2.00 * vhaddps (%rdi), %xmm1, %xmm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: [0,1] .DeeeeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %xmm1, %xmm2
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
+
+vshufps $0, %xmm0, %xmm1, %xmm1
+vhaddps (%rdi), %ymm1, %ymm2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 16
+# CHECK-NEXT: Total uOps: 5
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.13
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: 4 12 2.00 * vhaddps (%rdi), %ymm1, %ymm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %ymm1, %ymm2
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/instruction-info-view.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/instruction-info-view.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/instruction-info-view.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/instruction-info-view.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,36 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=true < %s | FileCheck %s --check-prefix=ENABLED
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=DISABLED
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info < %s | FileCheck %s -check-prefix=ENABLED
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false < %s | FileCheck %s -check-prefix=ENABLED
+
+vmulps %xmm0, %xmm1, %xmm2
+vhaddps %xmm2, %xmm2, %xmm3
+vhaddps %xmm3, %xmm3, %xmm4
+
+# DISABLED-NOT: Instruction Info:
+
+
+# ENABLED: Iterations: 100
+# ENABLED-NEXT: Instructions: 300
+# ENABLED-NEXT: Total Cycles: 414
+# ENABLED-NEXT: Total uOps: 700
+
+
+# ENABLED: Dispatch Width: 4
+# ENABLED-NEXT: uOps Per Cycle: 1.69
+# ENABLED-NEXT: IPC: 0.72
+# ENABLED-NEXT: Block RThroughput: 4.0
+
+# ENABLED: Instruction Info:
+# ENABLED-NEXT: [1]: #uOps
+# ENABLED-NEXT: [2]: Latency
+# ENABLED-NEXT: [3]: RThroughput
+# ENABLED-NEXT: [4]: MayLoad
+# ENABLED-NEXT: [5]: MayStore
+# ENABLED-NEXT: [6]: HasSideEffects (U)
+
+# ENABLED: [1] [2] [3] [4] [5] [6] Instructions:
+# ENABLED-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2
+# ENABLED-NEXT: 3 5 2.00 vhaddps %xmm2, %xmm2, %xmm3
+# ENABLED-NEXT: 3 5 2.00 vhaddps %xmm3, %xmm3, %xmm4
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,93 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s
+
+vmovaps (%rsi), %xmm0
+vmovaps %xmm0, (%rdi)
+vmovaps 16(%rsi), %xmm0
+vmovaps %xmm0, 16(%rdi)
+vmovaps 32(%rsi), %xmm0
+vmovaps %xmm0, 32(%rdi)
+vmovaps 48(%rsi), %xmm0
+vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 2803
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.29
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.50 * vmovaps (%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - - 4.00 - - 8.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - 1.00 vmovaps (%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: - - - - - - - 1.00 vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: - - - - - - - 1.00 vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: - - - - - - - 1.00 vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . vmovaps (%rsi), %xmm0
+# CHECK-NEXT: [0,1] D======eER. . . . . vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: [0,2] D=======eeeeeeER . . . vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: [0,3] D=============eER . . . vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: [0,4] .D=============eeeeeeER . . vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: [0,5] .D===================eER . . vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: [0,6] .D====================eeeeeeER. vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: [0,7] .D==========================eER vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: 3. 1 14.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: 5. 1 20.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: 6. 1 21.0 0.0 0.0 vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: 7. 1 27.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,93 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
+
+vmovaps (%rsi), %xmm0
+vmovaps %xmm0, (%rdi)
+vmovaps 16(%rsi), %xmm0
+vmovaps %xmm0, 16(%rdi)
+vmovaps 32(%rsi), %xmm0
+vmovaps %xmm0, 32(%rdi)
+vmovaps 48(%rsi), %xmm0
+vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 409
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.96
+# CHECK-NEXT: IPC: 1.96
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.50 * vmovaps (%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: 1 6 0.50 * vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - - 4.00 - 3.94 4.06
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - 0.97 0.03 vmovaps (%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: - - - - - - 0.03 0.97 vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - 0.97 0.03 vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: - - - - - - 1.00 - vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: - - - - - - - 1.00 vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: - - - - 1.00 - 0.97 0.03 vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . vmovaps (%rsi), %xmm0
+# CHECK-NEXT: [0,1] D======eER. . vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: [0,2] DeeeeeeE-R. . vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: [0,3] D=======eER . vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: [0,4] .DeeeeeeE-R . vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: [0,5] .D=======eER. vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: [0,6] .DeeeeeeE--R. vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: [0,7] .D========eER vmovaps %xmm0, 48(%rdi)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 vmovaps %xmm0, (%rdi)
+# CHECK-NEXT: 2. 1 1.0 1.0 1.0 vmovaps 16(%rsi), %xmm0
+# CHECK-NEXT: 3. 1 8.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
+# CHECK-NEXT: 4. 1 1.0 1.0 1.0 vmovaps 32(%rsi), %xmm0
+# CHECK-NEXT: 5. 1 8.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
+# CHECK-NEXT: 6. 1 1.0 1.0 2.0 vmovaps 48(%rsi), %xmm0
+# CHECK-NEXT: 7. 1 9.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/one-idioms.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/one-idioms.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/one-idioms.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/one-idioms.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,142 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=1 -register-file-stats < %s | FileCheck %s
+
+# These are dependency-breaking one-idioms.
+# Much like zero-idioms, but they produce ones, and do consume resources.
+
+# perf stats reports a throughput of 2.00 IPC.
+
+pcmpeqb %mm2, %mm2
+pcmpeqd %mm2, %mm2
+pcmpeqw %mm2, %mm2
+
+pcmpeqb %xmm2, %xmm2
+pcmpeqd %xmm2, %xmm2
+pcmpeqq %xmm2, %xmm2
+pcmpeqw %xmm2, %xmm2
+
+vpcmpeqb %xmm3, %xmm3, %xmm3
+vpcmpeqd %xmm3, %xmm3, %xmm3
+vpcmpeqq %xmm3, %xmm3, %xmm3
+vpcmpeqw %xmm3, %xmm3, %xmm3
+
+vpcmpeqb %xmm3, %xmm3, %xmm5
+vpcmpeqd %xmm3, %xmm3, %xmm5
+vpcmpeqq %xmm3, %xmm3, %xmm5
+vpcmpeqw %xmm3, %xmm3, %xmm5
+
+# FIXME: their handling is broken in llvm-mca.
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 1500
+# CHECK-NEXT: Total Cycles: 903
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.66
+# CHECK-NEXT: IPC: 1.66
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 pcmpeqb %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpeqd %mm2, %mm2
+# CHECK-NEXT: 1 3 1.00 pcmpeqw %mm2, %mm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm3, %xmm3, %xmm5
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 1500
+# CHECK-NEXT: Max number of mappings used: 168
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 7.65 - 7.35 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - pcmpeqb %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpeqd %mm2, %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pcmpeqw %mm2, %mm2
+# CHECK-NEXT: - - - 0.75 - 0.25 - - pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: - - - 0.49 - 0.51 - - pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: - - - 0.64 - 0.36 - - pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: - - - 0.21 - 0.79 - - pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: - - - 0.44 - 0.56 - - vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - 0.26 - 0.74 - - vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - 0.25 - 0.75 - - vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: - - - 0.25 - 0.75 - - vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - 0.55 - 0.45 - - vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - 0.44 - 0.56 - - vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: - - - 0.37 - 0.63 - - vpcmpeqw %xmm3, %xmm3, %xmm5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER .. pcmpeqb %mm2, %mm2
+# CHECK-NEXT: [0,1] D===eeeER .. pcmpeqd %mm2, %mm2
+# CHECK-NEXT: [0,2] D======eeeER pcmpeqw %mm2, %mm2
+# CHECK-NEXT: [0,3] DeE--------R pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: [0,4] .DeE-------R pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: [0,5] .D=eE------R pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: [0,6] .D==eE-----R pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: [0,7] .DeE-------R vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,8] . DeE------R vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,9] . D==eE----R vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,10] . D===eE---R vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,11] . D====eE--R vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,12] . D====eE-R vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,13] . D====eE-R vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,14] . D=====eER vpcmpeqw %xmm3, %xmm3, %xmm5
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 pcmpeqb %mm2, %mm2
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 pcmpeqd %mm2, %mm2
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 pcmpeqw %mm2, %mm2
+# CHECK-NEXT: 3. 1 1.0 1.0 8.0 pcmpeqb %xmm2, %xmm2
+# CHECK-NEXT: 4. 1 1.0 0.0 7.0 pcmpeqd %xmm2, %xmm2
+# CHECK-NEXT: 5. 1 2.0 0.0 6.0 pcmpeqq %xmm2, %xmm2
+# CHECK-NEXT: 6. 1 3.0 0.0 5.0 pcmpeqw %xmm2, %xmm2
+# CHECK-NEXT: 7. 1 1.0 1.0 7.0 vpcmpeqb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 8. 1 1.0 0.0 6.0 vpcmpeqd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 9. 1 3.0 1.0 4.0 vpcmpeqq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 10. 1 4.0 0.0 3.0 vpcmpeqw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 11. 1 5.0 0.0 2.0 vpcmpeqb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 12. 1 5.0 1.0 1.0 vpcmpeqd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 13. 1 5.0 1.0 1.0 vpcmpeqq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 14. 1 6.0 2.0 0.0 vpcmpeqw %xmm3, %xmm3, %xmm5
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,47 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
+
+imul %rax, %rbx
+lzcnt %ax, %bx
+add %ecx, %ebx
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 8
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.38
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imulq %rax, %rbx
+# CHECK-NEXT: 1 3 1.00 lzcntw %ax, %bx
+# CHECK-NEXT: 1 1 0.33 addl %ecx, %ebx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeER . imulq %rax, %rbx
+# CHECK-NEXT: [0,1] D=eeeER. lzcntw %ax, %bx
+# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx
+# CHECK-NEXT: 1. 1 2.0 2.0 0.0 lzcntw %ax, %bx
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,78 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+# perf stat reports a throughput of 1.00 IPC for this code snippet.
+
+# The ILP is limited by the false dependency on %dx. So, the mov cannot execute
+# in parallel with the add.
+
+add %cx, %dx
+mov %ax, %dx
+xor %bx, %dx
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 1504
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 2.99
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 addw %cx, %dx
+# CHECK-NEXT: 1 1 0.33 movw %ax, %dx
+# CHECK-NEXT: 1 1 0.33 xorw %bx, %dx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.67 - - 0.33 - - addw %cx, %dx
+# CHECK-NEXT: - - - 0.67 - 0.33 - - movw %ax, %dx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorw %bx, %dx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. addw %cx, %dx
+# CHECK-NEXT: [0,1] DeER .. movw %ax, %dx
+# CHECK-NEXT: [0,2] D=eER.. xorw %bx, %dx
+# CHECK-NEXT: [1,0] D==eER. addw %cx, %dx
+# CHECK-NEXT: [1,1] .DeE-R. movw %ax, %dx
+# CHECK-NEXT: [1,2] .D=eER. xorw %bx, %dx
+# CHECK-NEXT: [2,0] .D==eER addw %cx, %dx
+# CHECK-NEXT: [2,1] .DeE--R movw %ax, %dx
+# CHECK-NEXT: [2,2] . DeE-R xorw %bx, %dx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 2.3 0.3 0.0 addw %cx, %dx
+# CHECK-NEXT: 1. 3 1.0 1.0 1.0 movw %ax, %dx
+# CHECK-NEXT: 2. 3 1.7 0.0 0.3 xorw %bx, %dx
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+# perf stat reports a throughput of 0.60 IPC for this code snippet.
+
+# The lzcnt cannot execute in parallel with the imul because there is a false
+# dependency on %bx.
+
+imul %ax, %bx
+lzcnt %ax, %bx
+add %cx, %bx
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 3005
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.50
+# CHECK-NEXT: IPC: 1.50
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imulw %ax, %bx
+# CHECK-NEXT: 1 3 1.00 lzcntw %ax, %bx
+# CHECK-NEXT: 1 1 0.33 addw %cx, %bx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 0.50 2.00 - 0.50 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - imulw %ax, %bx
+# CHECK-NEXT: - - - 1.00 - - - - lzcntw %ax, %bx
+# CHECK-NEXT: - - 0.50 - - 0.50 - - addw %cx, %bx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER .. imulw %ax, %bx
+# CHECK-NEXT: [0,1] D=eeeER .. lzcntw %ax, %bx
+# CHECK-NEXT: [0,2] D====eER .. addw %cx, %bx
+# CHECK-NEXT: [1,0] D=====eeeER. imulw %ax, %bx
+# CHECK-NEXT: [1,1] .D=eeeE---R. lzcntw %ax, %bx
+# CHECK-NEXT: [1,2] .D====eE--R. addw %cx, %bx
+# CHECK-NEXT: [2,0] .D=====eeeER imulw %ax, %bx
+# CHECK-NEXT: [2,1] .D==eeeE---R lzcntw %ax, %bx
+# CHECK-NEXT: [2,2] . D====eE--R addw %cx, %bx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 4.3 0.3 0.0 imulw %ax, %bx
+# CHECK-NEXT: 1. 3 2.3 2.3 2.0 lzcntw %ax, %bx
+# CHECK-NEXT: 2. 3 5.0 0.0 1.3 addw %cx, %bx
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-5.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-5.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-5.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-5.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,61 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+# perf stat reports a throughput of 1.00 IPC for this code snippet.
+
+lzcnt %ax, %bx ## partial register stall.
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 1500
+# CHECK-NEXT: Total Cycles: 1505
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 lzcntw %ax, %bx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 1.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - lzcntw %ax, %bx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeER . lzcntw %ax, %bx
+# CHECK-NEXT: [1,0] D=eeeER. lzcntw %ax, %bx
+# CHECK-NEXT: [2,0] D==eeeER lzcntw %ax, %bx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 2.0 2.0 0.0 lzcntw %ax, %bx
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,80 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+# perf stat reports a throughput of 0.60 IPC for this code snippet.
+# Each lzcnt has a false dependency on %ecx; the first lzcnt has to wait on the
+# imul. However, the folded load can start immediately.
+# The last lzcnt has a false dependency on %cx. However, even in this case, the
+# folded load can start immediately.
+
+imul %edx, %ecx
+lzcnt (%rsp), %cx
+lzcnt 2(%rsp), %cx
+
+# CHECK: Iterations: 1500
+# CHECK-NEXT: Instructions: 4500
+# CHECK-NEXT: Total Cycles: 4510
+# CHECK-NEXT: Total uOps: 7500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.66
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imull %edx, %ecx
+# CHECK-NEXT: 2 8 1.00 * lzcntw (%rsp), %cx
+# CHECK-NEXT: 2 8 1.00 * lzcntw 2(%rsp), %cx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 3.00 - - - 2.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - imull %edx, %ecx
+# CHECK-NEXT: - - - 1.00 - - - 1.00 lzcntw (%rsp), %cx
+# CHECK-NEXT: - - - 1.00 - - - 1.00 lzcntw 2(%rsp), %cx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . imull %edx, %ecx
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . . lzcntw (%rsp), %cx
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . . lzcntw 2(%rsp), %cx
+# CHECK-NEXT: [1,0] .D=========eeeER . imull %edx, %ecx
+# CHECK-NEXT: [1,1] . D=eeeeeeeeE--R . lzcntw (%rsp), %cx
+# CHECK-NEXT: [1,2] . D==eeeeeeeeE-R . lzcntw 2(%rsp), %cx
+# CHECK-NEXT: [2,0] . D==========eeeER imull %edx, %ecx
+# CHECK-NEXT: [2,1] . D==eeeeeeeeE---R lzcntw (%rsp), %cx
+# CHECK-NEXT: [2,2] . D==eeeeeeeeE--R lzcntw 2(%rsp), %cx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 7.3 0.3 0.0 imull %edx, %ecx
+# CHECK-NEXT: 1. 3 2.3 2.3 1.7 lzcntw (%rsp), %cx
+# CHECK-NEXT: 2. 3 2.7 2.7 1.0 lzcntw 2(%rsp), %cx
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,47 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
+
+imul %ax, %cx
+add %al, %cl
+add %ecx, %ebx
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 8
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.38
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 imulw %ax, %cx
+# CHECK-NEXT: 1 1 0.33 addb %al, %cl
+# CHECK-NEXT: 1 1 0.33 addl %ecx, %ebx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeER . imulw %ax, %cx
+# CHECK-NEXT: [0,1] D===eER. addb %al, %cl
+# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,99 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=2 < %s | FileCheck %s
+
+# VALU0/VALU1
+vpmulld %xmm0, %xmm1, %xmm2
+vpand %xmm0, %xmm1, %xmm2
+
+# VIMUL/STC
+vcvttps2dq %xmm0, %xmm2
+vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+
+# FPA/FPM
+vaddps %xmm0, %xmm1, %xmm2
+vsqrtps %xmm0, %xmm2
+
+# FPA/FPM YMM
+vaddps %ymm0, %ymm1, %ymm2
+vsqrtps %ymm0, %ymm2
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 4256
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.23
+# CHECK-NEXT: IPC: 0.19
+# CHECK-NEXT: Block RThroughput: 42.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 1.00 vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 1 0.33 vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 1 14 6.00 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 14 14.00 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 3 29 28.00 vsqrtps %ymm0, %ymm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - 42.00 6.03 3.96 - 17.01 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 1.00 - - - - - vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 0.05 0.06 - 0.89 - - vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: - - 1.98 0.90 - 15.12 - - vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - 14.00 1.00 - - - - - vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - 28.00 2.00 - - 1.00 - - vsqrtps %ymm0, %ymm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . .. vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,1] DeE----R . . . . . .. vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,2] DeeeE--R . . . . . .. vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: [0,3] D=eeeeeeeeeeeeeeER . . . .. vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,4] .DeeeE-----------R . . . .. vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [0,5] .DeeeeeeeeeeeeeeER . . . .. vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: [0,6] .D=eeeE----------R . . . .. vaddps %ymm0, %ymm1, %ymm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 1.0 1.0 79.0 vpmulld %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1. 2 1.0 1.0 82.5 vpand %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2. 2 1.5 1.5 80.0 vcvttps2dq %xmm0, %xmm2
+# CHECK-NEXT: 3. 2 1.5 1.5 74.0 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4. 2 2.0 2.0 84.0 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 5. 2 9.5 9.5 65.0 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 6. 2 2.5 2.5 83.0 vaddps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 7. 2 147.5 147.5 0.0 vsqrtps %ymm0, %ymm2
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pr37790.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pr37790.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pr37790.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/pr37790.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,43 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -lqueue=2 -iterations=2 -resource-pressure=false -timeline -timeline-max-cycles=104 < %s | FileCheck %s
+
+int3
+stmxcsr (%rsp)
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 4
+# CHECK-NEXT: Total Cycles: 213
+# CHECK-NEXT: Total uOps: 10
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.05
+# CHECK-NEXT: IPC: 0.02
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 100 0.33 * * U int3
+# CHECK-NEXT: 4 5 1.00 * * U stmxcsr (%rsp)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 3.0 0.5 0.0 int3
+# CHECK-NEXT: 1. 2 100.0 0.0 0.0 stmxcsr (%rsp)
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/rank.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/rank.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/rank.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/rank.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,109 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+add %eax, %ecx
+add %eax, %edx
+add %eax, %ebx
+add %edx, %esi
+add %ebx, %eax
+add %edx, %esi
+add %ebx, %eax
+add %ebx, %eax
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 1.99
+# CHECK-NEXT: Block RThroughput: 2.7
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 addl %eax, %ecx
+# CHECK-NEXT: 1 1 0.33 addl %eax, %edx
+# CHECK-NEXT: 1 1 0.33 addl %eax, %ebx
+# CHECK-NEXT: 1 1 0.33 addl %edx, %esi
+# CHECK-NEXT: 1 1 0.33 addl %ebx, %eax
+# CHECK-NEXT: 1 1 0.33 addl %edx, %esi
+# CHECK-NEXT: 1 1 0.33 addl %ebx, %eax
+# CHECK-NEXT: 1 1 0.33 addl %ebx, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 2.66 2.67 - 2.67 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %eax, %ecx
+# CHECK-NEXT: - - 0.33 0.34 - 0.33 - - addl %eax, %edx
+# CHECK-NEXT: - - 0.34 0.33 - 0.33 - - addl %eax, %ebx
+# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %edx, %esi
+# CHECK-NEXT: - - 0.33 0.34 - 0.33 - - addl %ebx, %eax
+# CHECK-NEXT: - - 0.34 0.33 - 0.33 - - addl %edx, %esi
+# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %ebx, %eax
+# CHECK-NEXT: - - 0.33 0.34 - 0.33 - - addl %ebx, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . addl %eax, %ecx
+# CHECK-NEXT: [0,1] DeER . . . addl %eax, %edx
+# CHECK-NEXT: [0,2] DeER . . . addl %eax, %ebx
+# CHECK-NEXT: [0,3] D=eER. . . addl %edx, %esi
+# CHECK-NEXT: [0,4] .DeER. . . addl %ebx, %eax
+# CHECK-NEXT: [0,5] .D=eER . . addl %edx, %esi
+# CHECK-NEXT: [0,6] .D=eER . . addl %ebx, %eax
+# CHECK-NEXT: [0,7] .D==eER . . addl %ebx, %eax
+# CHECK-NEXT: [1,0] . D==eER . . addl %eax, %ecx
+# CHECK-NEXT: [1,1] . D==eER . . addl %eax, %edx
+# CHECK-NEXT: [1,2] . D==eER . . addl %eax, %ebx
+# CHECK-NEXT: [1,3] . D===eER . . addl %edx, %esi
+# CHECK-NEXT: [1,4] . D==eER . . addl %ebx, %eax
+# CHECK-NEXT: [1,5] . D===eER. . addl %edx, %esi
+# CHECK-NEXT: [1,6] . D===eER. . addl %ebx, %eax
+# CHECK-NEXT: [1,7] . D====eER . addl %ebx, %eax
+# CHECK-NEXT: [2,0] . D====eER . addl %eax, %ecx
+# CHECK-NEXT: [2,1] . D====eER . addl %eax, %edx
+# CHECK-NEXT: [2,2] . D====eER . addl %eax, %ebx
+# CHECK-NEXT: [2,3] . D=====eER . addl %edx, %esi
+# CHECK-NEXT: [2,4] . D====eER . addl %ebx, %eax
+# CHECK-NEXT: [2,5] . D=====eER. addl %edx, %esi
+# CHECK-NEXT: [2,6] . D=====eER. addl %ebx, %eax
+# CHECK-NEXT: [2,7] . D======eER addl %ebx, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 3.0 0.3 0.0 addl %eax, %ecx
+# CHECK-NEXT: 1. 3 3.0 0.3 0.0 addl %eax, %edx
+# CHECK-NEXT: 2. 3 3.0 0.3 0.0 addl %eax, %ebx
+# CHECK-NEXT: 3. 3 4.0 0.0 0.0 addl %edx, %esi
+# CHECK-NEXT: 4. 3 3.0 0.0 0.0 addl %ebx, %eax
+# CHECK-NEXT: 5. 3 4.0 0.0 0.0 addl %edx, %esi
+# CHECK-NEXT: 6. 3 4.0 0.0 0.0 addl %ebx, %eax
+# CHECK-NEXT: 7. 3 5.0 0.0 0.0 addl %ebx, %eax
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/rcu-statistics.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/rcu-statistics.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/rcu-statistics.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/rcu-statistics.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,61 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -retire-stats -iterations=1 < %s | FileCheck %s
+
+ vsqrtps %xmm0, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+ vaddps %xmm0, %xmm1, %xmm2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 16
+# CHECK-NEXT: Total Cycles: 20
+# CHECK-NEXT: Total uOps: 16
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.80
+# CHECK-NEXT: Block RThroughput: 15.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 14 14.00 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
+
+# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
+# CHECK-NEXT: [# retired], [# cycles]
+# CHECK-NEXT: 0, 16 (80.0%)
+# CHECK-NEXT: 1, 3 (15.0%)
+# CHECK-NEXT: 13, 1 (5.0%)
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,48 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
+
+# The vmul can start executing 3cy in advance. That is beause the first use
+# operand (i.e. %xmm1) is a ReadAfterLd. That means, the memory operand is
+# evaluated before %xmm1.
+
+vaddps %xmm0, %xmm0, %xmm1
+vmulps (%rdi), %xmm1, %xmm2
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 14
+# CHECK-NEXT: Total uOps: 3
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.21
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 2 11 1.00 * vmulps (%rdi), %xmm1, %xmm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: [0,1] DeeeeeeeeeeeER vmulps (%rdi), %xmm1, %xmm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,47 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=0 -timeline < %s | FileCheck %s
+
+ imull %esi
+ imull (%rdi)
+
+# The second integer multiply can start at cycle 2 because the implicit reads
+# can start after the load operand is evaluated.
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 13
+# CHECK-NEXT: Total uOps: 7
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.54
+# CHECK-NEXT: IPC: 0.15
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 3 4 1.00 imull %esi
+# CHECK-NEXT: 4 9 1.00 * imull (%rdi)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . imull %esi
+# CHECK-NEXT: [0,1] .DeeeeeeeeeER imull (%rdi)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 imull (%rdi)
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,47 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=0 -timeline -dispatch=3 < %s | FileCheck %s
+
+ add %rdi, %rsi
+ add (%rsp), %rsi
+ add %rdx, %r8
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 9
+# CHECK-NEXT: Total uOps: 4
+
+# CHECK: Dispatch Width: 3
+# CHECK-NEXT: uOps Per Cycle: 0.44
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 addq %rdi, %rsi
+# CHECK-NEXT: 2 6 0.50 * addq (%rsp), %rsi
+# CHECK-NEXT: 1 1 0.33 addq %rdx, %r8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . addq %rdi, %rsi
+# CHECK-NEXT: [0,1] DeeeeeeER addq (%rsp), %rsi
+# CHECK-NEXT: [0,2] .DeE----R addq %rdx, %r8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 addq %rdi, %rsi
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 addq (%rsp), %rsi
+# CHECK-NEXT: 2. 1 1.0 1.0 4.0 addq %rdx, %r8
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,80 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
+
+# The register move from XMM0 to XMM1 can be eliminated at register renaming
+# stage. So, it should not consume pipeline resources.
+
+vxorps %xmm0, %xmm0, %xmm0
+vmovaps %xmm0, %xmm1
+vaddps %xmm1, %xmm1, %xmm2
+
+# CHECK: Iterations: 3
+# CHECK-NEXT: Instructions: 9
+# CHECK-NEXT: Total Cycles: 9
+# CHECK-NEXT: Total uOps: 9
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1 1 1.00 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 1 3 1.00 vaddps %xmm1, %xmm1, %xmm2
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 9
+# CHECK-NEXT: Max number of mappings used: 8
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - - 1.00 - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: - - - - - 1.00 - - vmovaps %xmm0, %xmm1
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm1, %xmm1, %xmm2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DR . . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [0,1] DeER . . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [0,2] D=eeeER . vaddps %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [1,0] D-----R . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,1] .DeE--R . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [1,2] .D=eeeER. vaddps %xmm1, %xmm1, %xmm2
+# CHECK-NEXT: [2,0] .D-----R. vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,1] .D=eE--R. vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [2,2] . D=eeeER vaddps %xmm1, %xmm1, %xmm2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 0.0 0.0 3.3 vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1. 3 1.3 1.3 1.3 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 2. 3 2.0 0.0 0.0 vaddps %xmm1, %xmm1, %xmm2
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,121 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
+
+pxor %mm0, %mm0
+movq %mm0, %mm1
+
+xorps %xmm0, %xmm0
+movaps %xmm0, %xmm1
+movups %xmm1, %xmm2
+movapd %xmm2, %xmm3
+movupd %xmm3, %xmm4
+movdqa %xmm4, %xmm5
+movdqu %xmm5, %xmm0
+
+# CHECK: Iterations: 3
+# CHECK-NEXT: Instructions: 27
+# CHECK-NEXT: Total Cycles: 22
+# CHECK-NEXT: Total uOps: 27
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.23
+# CHECK-NEXT: IPC: 1.23
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.33 pxor %mm0, %mm0
+# CHECK-NEXT: 1 1 0.50 movq %mm0, %mm1
+# CHECK-NEXT: 1 0 0.25 xorps %xmm0, %xmm0
+# CHECK-NEXT: 1 1 1.00 movaps %xmm0, %xmm1
+# CHECK-NEXT: 1 1 1.00 movups %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 movapd %xmm2, %xmm3
+# CHECK-NEXT: 1 1 1.00 movupd %xmm3, %xmm4
+# CHECK-NEXT: 1 1 0.33 movdqa %xmm4, %xmm5
+# CHECK-NEXT: 1 1 0.33 movdqu %xmm5, %xmm0
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 27
+# CHECK-NEXT: Max number of mappings used: 21
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.67 1.67 - 4.67 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 0.67 - 0.33 - - pxor %mm0, %mm0
+# CHECK-NEXT: - - 1.00 - - - - - movq %mm0, %mm1
+# CHECK-NEXT: - - - - - - - - xorps %xmm0, %xmm0
+# CHECK-NEXT: - - - - - 1.00 - - movaps %xmm0, %xmm1
+# CHECK-NEXT: - - - - - 1.00 - - movups %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - movapd %xmm2, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - movupd %xmm3, %xmm4
+# CHECK-NEXT: - - - 1.00 - - - - movdqa %xmm4, %xmm5
+# CHECK-NEXT: - - 0.67 - - 0.33 - - movdqu %xmm5, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeER . . . .. pxor %mm0, %mm0
+# CHECK-NEXT: [0,1] D=eER. . . .. movq %mm0, %mm1
+# CHECK-NEXT: [0,2] D---R. . . .. xorps %xmm0, %xmm0
+# CHECK-NEXT: [0,3] D=eER. . . .. movaps %xmm0, %xmm1
+# CHECK-NEXT: [0,4] .D=eER . . .. movups %xmm1, %xmm2
+# CHECK-NEXT: [0,5] .D==eER . . .. movapd %xmm2, %xmm3
+# CHECK-NEXT: [0,6] .D===eER . . .. movupd %xmm3, %xmm4
+# CHECK-NEXT: [0,7] .D====eER . . .. movdqa %xmm4, %xmm5
+# CHECK-NEXT: [0,8] . D====eER. . .. movdqu %xmm5, %xmm0
+# CHECK-NEXT: [1,0] . DeE----R. . .. pxor %mm0, %mm0
+# CHECK-NEXT: [1,1] . D=eE---R. . .. movq %mm0, %mm1
+# CHECK-NEXT: [1,2] . D=====ER. . .. xorps %xmm0, %xmm0
+# CHECK-NEXT: [1,3] . D====eER . .. movaps %xmm0, %xmm1
+# CHECK-NEXT: [1,4] . D=====eER . .. movups %xmm1, %xmm2
+# CHECK-NEXT: [1,5] . D======eER . .. movapd %xmm2, %xmm3
+# CHECK-NEXT: [1,6] . D=======eER . .. movupd %xmm3, %xmm4
+# CHECK-NEXT: [1,7] . D=======eER. .. movdqa %xmm4, %xmm5
+# CHECK-NEXT: [1,8] . D========eER .. movdqu %xmm5, %xmm0
+# CHECK-NEXT: [2,0] . DeE--------R .. pxor %mm0, %mm0
+# CHECK-NEXT: [2,1] . D=eE-------R .. movq %mm0, %mm1
+# CHECK-NEXT: [2,2] . D========ER .. xorps %xmm0, %xmm0
+# CHECK-NEXT: [2,3] . D========eER .. movaps %xmm0, %xmm1
+# CHECK-NEXT: [2,4] . D=========eER .. movups %xmm1, %xmm2
+# CHECK-NEXT: [2,5] . D==========eER .. movapd %xmm2, %xmm3
+# CHECK-NEXT: [2,6] . .D==========eER.. movupd %xmm3, %xmm4
+# CHECK-NEXT: [2,7] . .D===========eER. movdqa %xmm4, %xmm5
+# CHECK-NEXT: [2,8] . .D============eER movdqu %xmm5, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 1.0 1.0 4.0 pxor %mm0, %mm0
+# CHECK-NEXT: 1. 3 2.0 0.0 3.3 movq %mm0, %mm1
+# CHECK-NEXT: 2. 3 5.0 0.0 1.0 xorps %xmm0, %xmm0
+# CHECK-NEXT: 3. 3 5.3 0.7 0.0 movaps %xmm0, %xmm1
+# CHECK-NEXT: 4. 3 6.0 0.0 0.0 movups %xmm1, %xmm2
+# CHECK-NEXT: 5. 3 7.0 0.0 0.0 movapd %xmm2, %xmm3
+# CHECK-NEXT: 6. 3 7.7 0.0 0.0 movupd %xmm3, %xmm4
+# CHECK-NEXT: 7. 3 8.3 0.0 0.0 movdqa %xmm4, %xmm5
+# CHECK-NEXT: 8. 3 9.0 0.0 0.0 movdqu %xmm5, %xmm0
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,106 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
+
+vxorps %xmm0, %xmm0, %xmm0
+vmovaps %xmm0, %xmm1
+vmovups %xmm1, %xmm2
+vmovapd %xmm2, %xmm3
+vmovupd %xmm3, %xmm4
+vmovdqa %xmm4, %xmm5
+vmovdqu %xmm5, %xmm0
+
+# CHECK: Iterations: 3
+# CHECK-NEXT: Instructions: 21
+# CHECK-NEXT: Total Cycles: 21
+# CHECK-NEXT: Total uOps: 21
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1 1 1.00 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 1 1 1.00 vmovups %xmm1, %xmm2
+# CHECK-NEXT: 1 1 1.00 vmovapd %xmm2, %xmm3
+# CHECK-NEXT: 1 1 1.00 vmovupd %xmm3, %xmm4
+# CHECK-NEXT: 1 1 0.33 vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: 1 1 0.33 vmovdqu %xmm5, %xmm0
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 21
+# CHECK-NEXT: Max number of mappings used: 17
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - 4.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: - - - - - 1.00 - - vmovaps %xmm0, %xmm1
+# CHECK-NEXT: - - - - - 1.00 - - vmovups %xmm1, %xmm2
+# CHECK-NEXT: - - - - - 1.00 - - vmovapd %xmm2, %xmm3
+# CHECK-NEXT: - - - - - 1.00 - - vmovupd %xmm3, %xmm4
+# CHECK-NEXT: - - - 1.00 - - - - vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: - - 1.00 - - - - - vmovdqu %xmm5, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DR . . . . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [0,1] DeER . . . . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [0,2] D=eER. . . . vmovups %xmm1, %xmm2
+# CHECK-NEXT: [0,3] D==eER . . . vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [0,4] .D==eER . . . vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [0,5] .D===eER . . . vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [0,6] .D====eER . . . vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: [1,0] .D=====ER . . . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,1] . D====eER. . . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [1,2] . D=====eER . . vmovups %xmm1, %xmm2
+# CHECK-NEXT: [1,3] . D======eER . . vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [1,4] . D=======eER . . vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [1,5] . D=======eER . . vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [1,6] . D========eER. . vmovdqu %xmm5, %xmm0
+# CHECK-NEXT: [2,0] . D=========ER. . vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,1] . D=========eER . vmovaps %xmm0, %xmm1
+# CHECK-NEXT: [2,2] . D=========eER . vmovups %xmm1, %xmm2
+# CHECK-NEXT: [2,3] . D==========eER . vmovapd %xmm2, %xmm3
+# CHECK-NEXT: [2,4] . D===========eER . vmovupd %xmm3, %xmm4
+# CHECK-NEXT: [2,5] . D============eER. vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: [2,6] . D============eER vmovdqu %xmm5, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 5.3 0.0 0.0 vxorps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1. 3 5.3 0.3 0.0 vmovaps %xmm0, %xmm1
+# CHECK-NEXT: 2. 3 6.0 0.0 0.0 vmovups %xmm1, %xmm2
+# CHECK-NEXT: 3. 3 7.0 0.0 0.0 vmovapd %xmm2, %xmm3
+# CHECK-NEXT: 4. 3 7.7 0.0 0.0 vmovupd %xmm3, %xmm4
+# CHECK-NEXT: 5. 3 8.3 0.0 0.0 vmovdqa %xmm4, %xmm5
+# CHECK-NEXT: 6. 3 9.0 0.0 0.0 vmovdqu %xmm5, %xmm0
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,92 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
+
+xor %eax, %eax
+mov %eax, %ebx
+mov %ebx, %ecx
+mov %ecx, %edx
+mov %edx, %eax
+
+# CHECK: Iterations: 3
+# CHECK-NEXT: Instructions: 15
+# CHECK-NEXT: Total Cycles: 15
+# CHECK-NEXT: Total uOps: 15
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 xorl %eax, %eax
+# CHECK-NEXT: 1 1 0.33 movl %eax, %ebx
+# CHECK-NEXT: 1 1 0.33 movl %ebx, %ecx
+# CHECK-NEXT: 1 1 0.33 movl %ecx, %edx
+# CHECK-NEXT: 1 1 0.33 movl %edx, %eax
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 18
+# CHECK-NEXT: Max number of mappings used: 15
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - xorl %eax, %eax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %eax, %ebx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %ebx, %ecx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %ecx, %edx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %edx, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DR . . . xorl %eax, %eax
+# CHECK-NEXT: [0,1] DeER . . . movl %eax, %ebx
+# CHECK-NEXT: [0,2] D=eER. . . movl %ebx, %ecx
+# CHECK-NEXT: [0,3] D==eER . . movl %ecx, %edx
+# CHECK-NEXT: [0,4] .D==eER . . movl %edx, %eax
+# CHECK-NEXT: [1,0] .D===ER . . xorl %eax, %eax
+# CHECK-NEXT: [1,1] .D===eER . . movl %eax, %ebx
+# CHECK-NEXT: [1,2] .D====eER . . movl %ebx, %ecx
+# CHECK-NEXT: [1,3] . D====eER. . movl %ecx, %edx
+# CHECK-NEXT: [1,4] . D=====eER . movl %edx, %eax
+# CHECK-NEXT: [2,0] . D======ER . xorl %eax, %eax
+# CHECK-NEXT: [2,1] . D======eER . movl %eax, %ebx
+# CHECK-NEXT: [2,2] . D======eER . movl %ebx, %ecx
+# CHECK-NEXT: [2,3] . D=======eER. movl %ecx, %edx
+# CHECK-NEXT: [2,4] . D========eER movl %edx, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 3.7 0.0 0.0 xorl %eax, %eax
+# CHECK-NEXT: 1. 3 4.0 0.3 0.0 movl %eax, %ebx
+# CHECK-NEXT: 2. 3 4.7 0.0 0.0 movl %ebx, %ecx
+# CHECK-NEXT: 3. 3 5.3 0.0 0.0 movl %ecx, %edx
+# CHECK-NEXT: 4. 3 6.0 0.0 0.0 movl %edx, %eax
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,92 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
+
+xor %rax, %rax
+mov %rax, %rbx
+mov %rbx, %rcx
+mov %rcx, %rdx
+mov %rdx, %rax
+
+# CHECK: Iterations: 3
+# CHECK-NEXT: Instructions: 15
+# CHECK-NEXT: Total Cycles: 15
+# CHECK-NEXT: Total uOps: 15
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 1.00
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.25 xorq %rax, %rax
+# CHECK-NEXT: 1 1 0.33 movq %rax, %rbx
+# CHECK-NEXT: 1 1 0.33 movq %rbx, %rcx
+# CHECK-NEXT: 1 1 0.33 movq %rcx, %rdx
+# CHECK-NEXT: 1 1 0.33 movq %rdx, %rax
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 18
+# CHECK-NEXT: Max number of mappings used: 15
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - - - - - - xorq %rax, %rax
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rax, %rbx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rbx, %rcx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rcx, %rdx
+# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rdx, %rax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DR . . . xorq %rax, %rax
+# CHECK-NEXT: [0,1] DeER . . . movq %rax, %rbx
+# CHECK-NEXT: [0,2] D=eER. . . movq %rbx, %rcx
+# CHECK-NEXT: [0,3] D==eER . . movq %rcx, %rdx
+# CHECK-NEXT: [0,4] .D==eER . . movq %rdx, %rax
+# CHECK-NEXT: [1,0] .D===ER . . xorq %rax, %rax
+# CHECK-NEXT: [1,1] .D===eER . . movq %rax, %rbx
+# CHECK-NEXT: [1,2] .D====eER . . movq %rbx, %rcx
+# CHECK-NEXT: [1,3] . D====eER. . movq %rcx, %rdx
+# CHECK-NEXT: [1,4] . D=====eER . movq %rdx, %rax
+# CHECK-NEXT: [2,0] . D======ER . xorq %rax, %rax
+# CHECK-NEXT: [2,1] . D======eER . movq %rax, %rbx
+# CHECK-NEXT: [2,2] . D======eER . movq %rbx, %rcx
+# CHECK-NEXT: [2,3] . D=======eER. movq %rcx, %rdx
+# CHECK-NEXT: [2,4] . D========eER movq %rdx, %rax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 3.7 0.0 0.0 xorq %rax, %rax
+# CHECK-NEXT: 1. 3 4.0 0.3 0.0 movq %rax, %rbx
+# CHECK-NEXT: 2. 3 4.7 0.0 0.0 movq %rbx, %rcx
+# CHECK-NEXT: 3. 3 5.3 0.0 0.0 movq %rcx, %rdx
+# CHECK-NEXT: 4. 3 6.0 0.0 0.0 movq %rdx, %rax
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-1.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-1.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-1.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,77 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=5 -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
+
+vaddps %xmm0, %xmm0, %xmm0
+vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Iterations: 5
+# CHECK-NEXT: Instructions: 10
+# CHECK-NEXT: Total Cycles: 43
+# CHECK-NEXT: Total uOps: 10
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.23
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 0
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT: 0, 40 (93.0%)
+# CHECK-NEXT: 2, 1 (2.3%)
+# CHECK-NEXT: 4, 2 (4.7%)
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 10
+# CHECK-NEXT: Max number of mappings used: 10
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeER . . . . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,0] D========eeeER . . . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,1] D===========eeeeeER . . . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,0] .D===============eeeER . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,1] .D==================eeeeeER . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [3,0] .D=======================eeeER. . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [3,1] .D==========================eeeeeER. . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [4,0] . D==============================eeeER . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [4,1] . D=================================eeeeeER vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 5 16.2 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1. 5 19.2 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-2.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-2.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-2.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,77 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -register-file-size=5 -iterations=5 -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
+
+vaddps %xmm0, %xmm0, %xmm0
+vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Iterations: 5
+# CHECK-NEXT: Instructions: 10
+# CHECK-NEXT: Total Cycles: 43
+# CHECK-NEXT: Total uOps: 10
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.23
+# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 20 (46.5%)
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT: 0, 36 (83.7%)
+# CHECK-NEXT: 1, 6 (14.0%)
+# CHECK-NEXT: 4, 1 (2.3%)
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 10
+# CHECK-NEXT: Max number of mappings used: 5
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 1.00 1.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeER . . . . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,0] D========eeeER . . . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [1,1] D===========eeeeeER . . . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,0] .D===============eeeER . . . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [2,1] . D==============eeeeeER . . . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [3,0] . . D==============eeeER. . . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [3,1] . . . D==============eeeeeER. . . vmulps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [4,0] . . . . D==============eeeER . . vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: [4,1] . . . . .D==============eeeeeER vmulps %xmm0, %xmm0, %xmm0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 5 11.2 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0
+# CHECK-NEXT: 1. 5 12.2 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-3.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-3.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-3.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,76 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -register-file-size=5 -iterations=2 -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
+
+idiv %eax
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 55
+# CHECK-NEXT: Total uOps: 2
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.04
+# CHECK-NEXT: IPC: 0.04
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 25 10.00 U idivl %eax
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 27 (49.1%)
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT: 0, 53 (96.4%)
+# CHECK-NEXT: 1, 2 (3.6%)
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 6
+# CHECK-NEXT: Max number of mappings used: 3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: 10.00 - 1.00 - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: 10.00 - 1.00 - - - - - idivl %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
+# CHECK-NEXT: [1,0] . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 1.0 1.0 0.0 idivl %eax
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-4.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-4.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-4.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-4.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,60 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=22 -dispatch-stats -register-file-stats -resource-pressure=false -timeline -timeline-max-iterations=3 < %s | FileCheck %s
+
+idiv %eax
+
+# CHECK: Iterations: 22
+# CHECK-NEXT: Instructions: 22
+# CHECK-NEXT: Total Cycles: 553
+# CHECK-NEXT: Total uOps: 22
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.04
+# CHECK-NEXT: IPC: 0.04
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 25 10.00 U idivl %eax
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 0
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT: 0, 547 (98.9%)
+# CHECK-NEXT: 2, 1 (0.2%)
+# CHECK-NEXT: 4, 5 (0.9%)
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 66
+# CHECK-NEXT: Max number of mappings used: 66
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . . idivl %eax
+# CHECK-NEXT: [1,0] D=========================eeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
+# CHECK-NEXT: [2,0] D==================================================eeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 3 26.0 0.3 0.0 idivl %eax
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-5.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-5.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-5.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/register-files-5.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,143 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
+
+ vdivps %ymm0, %ymm0, %ymm1
+ vaddps %ymm0, %ymm0, %ymm2
+ vaddps %ymm0, %ymm0, %ymm3
+ vaddps %ymm0, %ymm0, %ymm4
+ vaddps %ymm0, %ymm0, %ymm5
+ vaddps %ymm0, %ymm0, %ymm6
+ vaddps %ymm0, %ymm0, %ymm7
+ vaddps %ymm0, %ymm0, %ymm8
+ vaddps %ymm0, %ymm0, %ymm9
+ vaddps %ymm0, %ymm0, %ymm10
+ vaddps %ymm0, %ymm0, %ymm11
+ vaddps %ymm0, %ymm0, %ymm12
+ vaddps %ymm0, %ymm0, %ymm13
+ vaddps %ymm0, %ymm0, %ymm14
+ vaddps %ymm0, %ymm0, %ymm15
+ vaddps %ymm2, %ymm0, %ymm0
+ vaddps %ymm2, %ymm0, %ymm3
+ vaddps %ymm2, %ymm0, %ymm4
+ vaddps %ymm2, %ymm0, %ymm5
+ vaddps %ymm2, %ymm0, %ymm6
+ vaddps %ymm2, %ymm0, %ymm7
+ vaddps %ymm2, %ymm0, %ymm8
+ vaddps %ymm2, %ymm0, %ymm9
+ vaddps %ymm2, %ymm0, %ymm10
+ vaddps %ymm2, %ymm0, %ymm11
+ vaddps %ymm2, %ymm0, %ymm12
+ vaddps %ymm2, %ymm0, %ymm13
+ vaddps %ymm2, %ymm0, %ymm14
+ vaddps %ymm2, %ymm0, %ymm15
+ vaddps %ymm3, %ymm0, %ymm2
+ vaddps %ymm3, %ymm0, %ymm4
+ vaddps %ymm3, %ymm0, %ymm5
+ vaddps %ymm3, %ymm0, %ymm6
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 33
+# CHECK-NEXT: Total Cycles: 37
+# CHECK-NEXT: Total uOps: 35
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.95
+# CHECK-NEXT: IPC: 0.89
+# CHECK-NEXT: Block RThroughput: 32.0
+
+# CHECK: Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT - Register unavailable: 0
+# CHECK-NEXT: RCU - Retire tokens unavailable: 0
+# CHECK-NEXT: SCHEDQ - Scheduler full: 0
+# CHECK-NEXT: LQ - Load queue full: 0
+# CHECK-NEXT: SQ - Store queue full: 0
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
+
+# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT: 0, 28 (75.7%)
+# CHECK-NEXT: 3, 1 (2.7%)
+# CHECK-NEXT: 4, 8 (21.6%)
+
+# CHECK: Register File statistics:
+# CHECK-NEXT: Total number of mappings created: 33
+# CHECK-NEXT: Max number of mappings used: 33
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER .. vdivps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: [0,1] DeeeE--------------------------R .. vaddps %ymm0, %ymm0, %ymm2
+# CHECK-NEXT: [0,2] .DeeeE-------------------------R .. vaddps %ymm0, %ymm0, %ymm3
+# CHECK-NEXT: [0,3] .D=eeeE------------------------R .. vaddps %ymm0, %ymm0, %ymm4
+# CHECK-NEXT: [0,4] .D==eeeE-----------------------R .. vaddps %ymm0, %ymm0, %ymm5
+# CHECK-NEXT: [0,5] .D===eeeE----------------------R .. vaddps %ymm0, %ymm0, %ymm6
+# CHECK-NEXT: [0,6] . D===eeeE---------------------R .. vaddps %ymm0, %ymm0, %ymm7
+# CHECK-NEXT: [0,7] . D=====eeeE-------------------R .. vaddps %ymm0, %ymm0, %ymm8
+# CHECK-NEXT: [0,8] . D======eeeE------------------R .. vaddps %ymm0, %ymm0, %ymm9
+# CHECK-NEXT: [0,9] . D=======eeeE-----------------R .. vaddps %ymm0, %ymm0, %ymm10
+# CHECK-NEXT: [0,10] . D=======eeeE----------------R .. vaddps %ymm0, %ymm0, %ymm11
+# CHECK-NEXT: [0,11] . D========eeeE---------------R .. vaddps %ymm0, %ymm0, %ymm12
+# CHECK-NEXT: [0,12] . D=========eeeE--------------R .. vaddps %ymm0, %ymm0, %ymm13
+# CHECK-NEXT: [0,13] . D===========eeeE------------R .. vaddps %ymm0, %ymm0, %ymm14
+# CHECK-NEXT: [0,14] . D===========eeeE-----------R .. vaddps %ymm0, %ymm0, %ymm15
+# CHECK-NEXT: [0,15] . D==eeeE--------------------R .. vaddps %ymm2, %ymm0, %ymm0
+# CHECK-NEXT: [0,16] . D=========eeeE-------------R .. vaddps %ymm2, %ymm0, %ymm3
+# CHECK-NEXT: [0,17] . D============eeeE----------R .. vaddps %ymm2, %ymm0, %ymm4
+# CHECK-NEXT: [0,18] . D============eeeE---------R .. vaddps %ymm2, %ymm0, %ymm5
+# CHECK-NEXT: [0,19] . D=============eeeE--------R .. vaddps %ymm2, %ymm0, %ymm6
+# CHECK-NEXT: [0,20] . D==============eeeE-------R .. vaddps %ymm2, %ymm0, %ymm7
+# CHECK-NEXT: [0,21] . D===============eeeE------R .. vaddps %ymm2, %ymm0, %ymm8
+# CHECK-NEXT: [0,22] . .D===============eeeE-----R .. vaddps %ymm2, %ymm0, %ymm9
+# CHECK-NEXT: [0,23] . .D================eeeE----R .. vaddps %ymm2, %ymm0, %ymm10
+# CHECK-NEXT: [0,24] . .D=================eeeE---R .. vaddps %ymm2, %ymm0, %ymm11
+# CHECK-NEXT: [0,25] . .D==================eeeE--R .. vaddps %ymm2, %ymm0, %ymm12
+# CHECK-NEXT: [0,26] . . D==================eeeE-R .. vaddps %ymm2, %ymm0, %ymm13
+# CHECK-NEXT: [0,27] . . D===================eeeER .. vaddps %ymm2, %ymm0, %ymm14
+# CHECK-NEXT: [0,28] . . D====================eeeER .. vaddps %ymm2, %ymm0, %ymm15
+# CHECK-NEXT: [0,29] . . D=====================eeeER .. vaddps %ymm3, %ymm0, %ymm2
+# CHECK-NEXT: [0,30] . . D=====================eeeER.. vaddps %ymm3, %ymm0, %ymm4
+# CHECK-NEXT: [0,31] . . D======================eeeER. vaddps %ymm3, %ymm0, %ymm5
+# CHECK-NEXT: [0,32] . . D=======================eeeER vaddps %ymm3, %ymm0, %ymm6
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vdivps %ymm0, %ymm0, %ymm1
+# CHECK-NEXT: 1. 1 1.0 1.0 26.0 vaddps %ymm0, %ymm0, %ymm2
+# CHECK-NEXT: 2. 1 1.0 1.0 25.0 vaddps %ymm0, %ymm0, %ymm3
+# CHECK-NEXT: 3. 1 2.0 2.0 24.0 vaddps %ymm0, %ymm0, %ymm4
+# CHECK-NEXT: 4. 1 3.0 3.0 23.0 vaddps %ymm0, %ymm0, %ymm5
+# CHECK-NEXT: 5. 1 4.0 4.0 22.0 vaddps %ymm0, %ymm0, %ymm6
+# CHECK-NEXT: 6. 1 4.0 4.0 21.0 vaddps %ymm0, %ymm0, %ymm7
+# CHECK-NEXT: 7. 1 6.0 6.0 19.0 vaddps %ymm0, %ymm0, %ymm8
+# CHECK-NEXT: 8. 1 7.0 7.0 18.0 vaddps %ymm0, %ymm0, %ymm9
+# CHECK-NEXT: 9. 1 8.0 8.0 17.0 vaddps %ymm0, %ymm0, %ymm10
+# CHECK-NEXT: 10. 1 8.0 8.0 16.0 vaddps %ymm0, %ymm0, %ymm11
+# CHECK-NEXT: 11. 1 9.0 9.0 15.0 vaddps %ymm0, %ymm0, %ymm12
+# CHECK-NEXT: 12. 1 10.0 10.0 14.0 vaddps %ymm0, %ymm0, %ymm13
+# CHECK-NEXT: 13. 1 12.0 12.0 12.0 vaddps %ymm0, %ymm0, %ymm14
+# CHECK-NEXT: 14. 1 12.0 12.0 11.0 vaddps %ymm0, %ymm0, %ymm15
+# CHECK-NEXT: 15. 1 3.0 3.0 20.0 vaddps %ymm2, %ymm0, %ymm0
+# CHECK-NEXT: 16. 1 10.0 4.0 13.0 vaddps %ymm2, %ymm0, %ymm3
+# CHECK-NEXT: 17. 1 13.0 7.0 10.0 vaddps %ymm2, %ymm0, %ymm4
+# CHECK-NEXT: 18. 1 13.0 8.0 9.0 vaddps %ymm2, %ymm0, %ymm5
+# CHECK-NEXT: 19. 1 14.0 9.0 8.0 vaddps %ymm2, %ymm0, %ymm6
+# CHECK-NEXT: 20. 1 15.0 10.0 7.0 vaddps %ymm2, %ymm0, %ymm7
+# CHECK-NEXT: 21. 1 16.0 11.0 6.0 vaddps %ymm2, %ymm0, %ymm8
+# CHECK-NEXT: 22. 1 16.0 12.0 5.0 vaddps %ymm2, %ymm0, %ymm9
+# CHECK-NEXT: 23. 1 17.0 13.0 4.0 vaddps %ymm2, %ymm0, %ymm10
+# CHECK-NEXT: 24. 1 18.0 14.0 3.0 vaddps %ymm2, %ymm0, %ymm11
+# CHECK-NEXT: 25. 1 19.0 15.0 2.0 vaddps %ymm2, %ymm0, %ymm12
+# CHECK-NEXT: 26. 1 19.0 16.0 1.0 vaddps %ymm2, %ymm0, %ymm13
+# CHECK-NEXT: 27. 1 20.0 17.0 0.0 vaddps %ymm2, %ymm0, %ymm14
+# CHECK-NEXT: 28. 1 21.0 18.0 0.0 vaddps %ymm2, %ymm0, %ymm15
+# CHECK-NEXT: 29. 1 22.0 12.0 0.0 vaddps %ymm3, %ymm0, %ymm2
+# CHECK-NEXT: 30. 1 22.0 13.0 0.0 vaddps %ymm3, %ymm0, %ymm4
+# CHECK-NEXT: 31. 1 23.0 14.0 0.0 vaddps %ymm3, %ymm0, %ymm5
+# CHECK-NEXT: 32. 1 24.0 15.0 0.0 vaddps %ymm3, %ymm0, %ymm6
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-3dnow.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-3dnow.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-3dnow.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-3dnow.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,208 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+femms
+
+pavgusb %mm0, %mm2
+pavgusb (%rax), %mm2
+
+pf2id %mm0, %mm2
+pf2id (%rax), %mm2
+
+pf2iw %mm0, %mm2
+pf2iw (%rax), %mm2
+
+pfacc %mm0, %mm2
+pfacc (%rax), %mm2
+
+pfadd %mm0, %mm2
+pfadd (%rax), %mm2
+
+pfcmpeq %mm0, %mm2
+pfcmpeq (%rax), %mm2
+
+pfcmpge %mm0, %mm2
+pfcmpge (%rax), %mm2
+
+pfcmpgt %mm0, %mm2
+pfcmpgt (%rax), %mm2
+
+pfmax %mm0, %mm2
+pfmax (%rax), %mm2
+
+pfmin %mm0, %mm2
+pfmin (%rax), %mm2
+
+pfmul %mm0, %mm2
+pfmul (%rax), %mm2
+
+pfnacc %mm0, %mm2
+pfnacc (%rax), %mm2
+
+pfpnacc %mm0, %mm2
+pfpnacc (%rax), %mm2
+
+pfrcp %mm0, %mm2
+pfrcp (%rax), %mm2
+
+pfrcpit1 %mm0, %mm2
+pfrcpit1 (%rax), %mm2
+
+pfrcpit2 %mm0, %mm2
+pfrcpit2 (%rax), %mm2
+
+pfrsqit1 %mm0, %mm2
+pfrsqit1 (%rax), %mm2
+
+pfrsqrt %mm0, %mm2
+pfrsqrt (%rax), %mm2
+
+pfsub %mm0, %mm2
+pfsub (%rax), %mm2
+
+pfsubr %mm0, %mm2
+pfsubr (%rax), %mm2
+
+pi2fd %mm0, %mm2
+pi2fd (%rax), %mm2
+
+pi2fw %mm0, %mm2
+pi2fw (%rax), %mm2
+
+pmulhrw %mm0, %mm2
+pmulhrw (%rax), %mm2
+
+prefetch (%rax)
+prefetchw (%rax)
+
+pswapd %mm0, %mm2
+pswapd (%rax), %mm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 31 31 10.33 * * U femms
+# CHECK-NEXT: 1 3 1.00 pavgusb %mm0, %mm2
+# CHECK-NEXT: 2 8 1.00 * pavgusb (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pf2id %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pf2id (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pf2iw %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pf2iw (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfacc %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfacc (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfadd %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfadd (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfcmpeq %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfcmpeq (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfcmpge %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfcmpge (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfcmpgt %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfcmpgt (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfmax %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfmax (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfmin %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfmin (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfmul %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfmul (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfnacc %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfnacc (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfpnacc %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfpnacc (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfrcp %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfrcp (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfrcpit1 %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfrcpit1 (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfrcpit2 %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfrcpit2 (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfrsqit1 %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfrsqit1 (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfrsqrt %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfrsqrt (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfsub %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfsub (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pfsubr %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pfsubr (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pi2fd %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pi2fd (%rax), %mm2
+# CHECK-NEXT: 1 3 1.00 pi2fw %mm0, %mm2
+# CHECK-NEXT: 2 9 1.00 * pi2fw (%rax), %mm2
+# CHECK-NEXT: 1 5 1.00 pmulhrw %mm0, %mm2
+# CHECK-NEXT: 2 10 1.00 * pmulhrw (%rax), %mm2
+# CHECK-NEXT: 1 5 0.50 * * prefetch (%rax)
+# CHECK-NEXT: 1 5 0.50 * * prefetchw (%rax)
+# CHECK-NEXT: 1 1 1.00 pswapd %mm0, %mm2
+# CHECK-NEXT: 2 6 1.00 * pswapd (%rax), %mm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 12.33 54.33 - 12.33 13.00 13.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 10.33 10.33 - 10.33 - - femms
+# CHECK-NEXT: - - - 1.00 - - - - pavgusb %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgusb (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pf2id %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pf2id (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pf2iw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pf2iw (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfacc %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfacc (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfadd %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfadd (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfcmpeq %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfcmpeq (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfcmpge %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfcmpge (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfcmpgt %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfcmpgt (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfmax %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfmax (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfmin %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfmin (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfmul %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfmul (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfnacc %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfnacc (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfpnacc %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfpnacc (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfrcp %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrcp (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfrcpit1 %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrcpit1 (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfrcpit2 %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrcpit2 (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfrsqit1 %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrsqit1 (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfrsqrt %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrsqrt (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfsub %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfsub (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pfsubr %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfsubr (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pi2fd %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pi2fd (%rax), %mm2
+# CHECK-NEXT: - - - 1.00 - - - - pi2fw %mm0, %mm2
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pi2fw (%rax), %mm2
+# CHECK-NEXT: - - 1.00 - - - - - pmulhrw %mm0, %mm2
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhrw (%rax), %mm2
+# CHECK-NEXT: - - - - - - 0.50 0.50 prefetch (%rax)
+# CHECK-NEXT: - - - - - - 0.50 0.50 prefetchw (%rax)
+# CHECK-NEXT: - - - - - 1.00 - - pswapd %mm0, %mm2
+# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pswapd (%rax), %mm2
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-adx.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-adx.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-adx.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-adx.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,55 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+adcx %ebx, %ecx
+adcx (%rbx), %ecx
+adcx %rbx, %rcx
+adcx (%rbx), %rcx
+
+adox %ebx, %ecx
+adox (%rbx), %ecx
+adox %rbx, %rcx
+adox (%rbx), %rcx
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 2 0.67 adcxl %ebx, %ecx
+# CHECK-NEXT: 3 7 0.67 * adcxl (%rbx), %ecx
+# CHECK-NEXT: 2 2 0.67 adcxq %rbx, %rcx
+# CHECK-NEXT: 3 7 0.67 * adcxq (%rbx), %rcx
+# CHECK-NEXT: 2 2 0.67 adoxl %ebx, %ecx
+# CHECK-NEXT: 3 7 0.67 * adoxl (%rbx), %ecx
+# CHECK-NEXT: 2 2 0.67 adoxq %rbx, %rcx
+# CHECK-NEXT: 3 7 0.67 * adoxq (%rbx), %rcx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 6.67 2.67 - 6.67 2.00 2.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcxl %ebx, %ecx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adcxl (%rbx), %ecx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcxq %rbx, %rcx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adcxq (%rbx), %rcx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adoxl %ebx, %ecx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adoxl (%rbx), %ecx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adoxq %rbx, %rcx
+# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adoxq (%rbx), %rcx
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-aes.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-aes.s?rev=345462&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-aes.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/resources-aes.s Sat Oct 27 13:36:11 2018
@@ -0,0 +1,71 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+aesdec %xmm0, %xmm2
+aesdec (%rax), %xmm2
+
+aesdeclast %xmm0, %xmm2
+aesdeclast (%rax), %xmm2
+
+aesenc %xmm0, %xmm2
+aesenc (%rax), %xmm2
+
+aesenclast %xmm0, %xmm2
+aesenclast (%rax), %xmm2
+
+aesimc %xmm0, %xmm2
+aesimc (%rax), %xmm2
+
+aeskeygenassist $22, %xmm0, %xmm2
+aeskeygenassist $22, (%rax), %xmm2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 7 1.00 aesdec %xmm0, %xmm2
+# CHECK-NEXT: 3 13 1.00 * aesdec (%rax), %xmm2
+# CHECK-NEXT: 2 7 1.00 aesdeclast %xmm0, %xmm2
+# CHECK-NEXT: 3 13 1.00 * aesdeclast (%rax), %xmm2
+# CHECK-NEXT: 2 7 1.00 aesenc %xmm0, %xmm2
+# CHECK-NEXT: 3 13 1.00 * aesenc (%rax), %xmm2
+# CHECK-NEXT: 2 7 1.00 aesenclast %xmm0, %xmm2
+# CHECK-NEXT: 3 13 1.00 * aesenclast (%rax), %xmm2
+# CHECK-NEXT: 2 12 2.00 aesimc %xmm0, %xmm2
+# CHECK-NEXT: 3 18 2.00 * aesimc (%rax), %xmm2
+# CHECK-NEXT: 1 8 3.67 aeskeygenassist $22, %xmm0, %xmm2
+# CHECK-NEXT: 1 8 3.33 * aeskeygenassist $22, (%rax), %xmm2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 9.67 9.67 - 21.67 3.00 3.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesdec %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesdec (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesdeclast %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesdeclast (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesenc %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesenc (%rax), %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesenclast %xmm0, %xmm2
+# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesenclast (%rax), %xmm2
+# CHECK-NEXT: - - - - - 2.00 - - aesimc %xmm0, %xmm2
+# CHECK-NEXT: - - - - - 2.00 0.50 0.50 aesimc (%rax), %xmm2
+# CHECK-NEXT: - - 3.67 3.67 - 3.67 - - aeskeygenassist $22, %xmm0, %xmm2
+# CHECK-NEXT: - - 3.33 3.33 - 3.33 0.50 0.50 aeskeygenassist $22, (%rax), %xmm2
More information about the llvm-commits
mailing list