[llvm] r353043 - [AsmPrinter] Remove hidden flag -print-schedule.
Andrea Di Biagio via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 4 04:51:26 PST 2019
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath.ll?rev=353043&r1=353042&r2=353043&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll Mon Feb 4 04:51:26 2019
@@ -2,13 +2,13 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
; If the target's divss/divps instructions are substantially
; slower than rcpss/rcpps with a Newton-Raphson refinement,
@@ -26,59 +26,11 @@ define float @f32_no_estimate(float %x)
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-RECIP-LABEL: f32_no_estimate:
-; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
-; AVX-RECIP-NEXT: retq
-;
-; FMA-RECIP-LABEL: f32_no_estimate:
-; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; FMA-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
-; FMA-RECIP-NEXT: retq
-;
-; BDVER2-LABEL: f32_no_estimate:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [9:9.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: f32_no_estimate:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: f32_no_estimate:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: f32_no_estimate:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:7.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-NO-FMA-LABEL: f32_no_estimate:
-; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; HASWELL-NO-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: retq
-;
-; KNL-LABEL: f32_no_estimate:
-; KNL: # %bb.0:
-; KNL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; KNL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:7.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_no_estimate:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [11:3.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX-LABEL: f32_no_estimate:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
%div = fdiv fast float 1.0, %x
ret float %div
}
@@ -114,37 +66,37 @@ define float @f32_one_step(float %x) #1
;
; BDVER2-LABEL: f32_one_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_one_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_one_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -156,19 +108,12 @@ define float @f32_one_step(float %x) #1
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: f32_one_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_one_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: f32_one_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; AVX512-NEXT: retq
%div = fdiv fast float 1.0, %x
ret float %div
}
@@ -218,52 +163,52 @@ define float @f32_two_step(float %x) #2
;
; BDVER2-LABEL: f32_two_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2
+; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2
+; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_two_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2
+; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2
+; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_two_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -279,27 +224,16 @@ define float @f32_two_step(float %x) #2
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: f32_two_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_two_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: f32_two_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512-NEXT: vmovaps %xmm1, %xmm3
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; AVX512-NEXT: retq
%div = fdiv fast float 1.0, %x
ret float %div
}
@@ -326,27 +260,27 @@ define <4 x float> @v4f32_no_estimate(<4
;
; BDVER2-LABEL: v4f32_no_estimate:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [9:9.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_no_estimate:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_no_estimate:
; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_no_estimate:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:7.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
@@ -354,17 +288,11 @@ define <4 x float> @v4f32_no_estimate(<4
; HASWELL-NO-FMA-NEXT: vdivps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v4f32_no_estimate:
-; KNL: # %bb.0:
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:7.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v4f32_no_estimate:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SKX-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [11:3.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v4f32_no_estimate:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@@ -400,38 +328,38 @@ define <4 x float> @v4f32_one_step(<4 x
;
; BDVER2-LABEL: v4f32_one_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_one_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_one_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -445,18 +373,18 @@ define <4 x float> @v4f32_one_step(<4 x
;
; KNL-LABEL: v4f32_one_step:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %xmm0, %xmm1
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; KNL-NEXT: retq
;
; SKX-LABEL: v4f32_one_step:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %xmm0, %xmm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; SKX-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@@ -506,52 +434,52 @@ define <4 x float> @v4f32_two_step(<4 x
;
; BDVER2-LABEL: v4f32_two_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2
+; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2
+; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2
+; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_two_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2
+; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2
+; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2
+; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_two_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -567,27 +495,16 @@ define <4 x float> @v4f32_two_step(<4 x
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v4f32_two_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v4f32_two_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v4f32_two_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpps %xmm0, %xmm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %xmm1, %xmm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; AVX512-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@@ -617,27 +534,27 @@ define <8 x float> @v8f32_no_estimate(<8
;
; BDVER2-LABEL: v8f32_no_estimate:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [9:19.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_no_estimate:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [38:38.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_no_estimate:
; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:28.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_no_estimate:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:14.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
@@ -645,17 +562,11 @@ define <8 x float> @v8f32_no_estimate(<8
; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v8f32_no_estimate:
-; KNL: # %bb.0:
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:14.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v8f32_no_estimate:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SKX-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [11:5.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v8f32_no_estimate:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -698,38 +609,38 @@ define <8 x float> @v8f32_one_step(<8 x
;
; BDVER2-LABEL: v8f32_one_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_one_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_one_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -743,18 +654,18 @@ define <8 x float> @v8f32_one_step(<8 x
;
; KNL-LABEL: v8f32_one_step:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %ymm0, %ymm1
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; KNL-NEXT: retq
;
; SKX-LABEL: v8f32_one_step:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %ymm0, %ymm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; SKX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -817,52 +728,52 @@ define <8 x float> @v8f32_two_step(<8 x
;
; BDVER2-LABEL: v8f32_two_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2
+; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2
+; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_two_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2
+; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_two_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %ymm1, %ymm3
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -878,27 +789,16 @@ define <8 x float> @v8f32_two_step(<8 x
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v8f32_two_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v8f32_two_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v8f32_two_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpps %ymm0, %ymm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %ymm1, %ymm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
+; AVX512-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -936,31 +836,31 @@ define <16 x float> @v16f32_no_estimate(
;
; BDVER2-LABEL: v16f32_no_estimate:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [9:19.00]
-; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [9:19.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_no_estimate:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [38:38.00]
-; BTVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [38:38.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_no_estimate:
; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [29:28.00]
-; SANDY-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [29:28.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_no_estimate:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [21:14.00]
-; HASWELL-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [21:14.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; HASWELL-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
@@ -969,17 +869,11 @@ define <16 x float> @v16f32_no_estimate(
; HASWELL-NO-FMA-NEXT: vdivps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v16f32_no_estimate:
-; KNL: # %bb.0:
-; KNL-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00]
-; KNL-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [21:14.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_no_estimate:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50]
-; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v16f32_no_estimate:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
@@ -1045,55 +939,55 @@ define <16 x float> @v16f32_one_step(<16
;
; BDVER2-LABEL: v16f32_one_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm4
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm4, %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm4, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm4, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm2
+; BTVER2-NEXT: vrcpps %ymm1, %ymm4
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; BTVER2-NEXT: vmulps %ymm4, %ymm1, %ymm1
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vmulps %ymm1, %ymm4, %ymm1
+; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vaddps %ymm1, %ymm4, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_one_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vrcpps %ymm1, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_one_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vrcpps %ymm1, %ymm4 # sched: [11:2.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vrcpps %ymm1, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -1110,19 +1004,12 @@ define <16 x float> @v16f32_one_step(<16
; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v16f32_one_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_one_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v16f32_one_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
@@ -1226,81 +1113,81 @@ define <16 x float> @v16f32_two_step(<16
;
; BDVER2-LABEL: v16f32_two_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3
+; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vrcpps %ymm1, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3
+; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_two_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3
+; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vrcpps %ymm1, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3
+; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_two_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %ymm2, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4
+; HASWELL-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NEXT: vmovaps %ymm2, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -1325,27 +1212,16 @@ define <16 x float> @v16f32_two_step(<16
; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v16f32_two_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00]
-; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_two_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50]
-; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v16f32_two_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %zmm1, %zmm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll?rev=353043&r1=353042&r2=353043&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll Mon Feb 4 04:51:26 2019
@@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
; It's the extra tests coverage for recip as discussed on D26855.
@@ -19,59 +19,11 @@ define float @f32_no_step_2(float %x) #3
; SSE-NEXT: mulss {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX-RECIP-LABEL: f32_no_step_2:
-; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm0
-; AVX-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; AVX-RECIP-NEXT: retq
-;
-; FMA-RECIP-LABEL: f32_no_step_2:
-; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm0
-; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; FMA-RECIP-NEXT: retq
-;
-; BDVER2-LABEL: f32_no_step_2:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: f32_no_step_2:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: f32_no_step_2:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: f32_no_step_2:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-NO-FMA-LABEL: f32_no_step_2:
-; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: f32_no_step_2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_no_step_2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX-LABEL: f32_no_step_2:
+; AVX: # %bb.0:
+; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
%div = fdiv fast float 1234.0, %x
ret float %div
}
@@ -110,68 +62,60 @@ define float @f32_one_step_2(float %x) #
;
; BDVER2-LABEL: f32_one_step_2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_one_step_2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_one_step_2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_one_step_2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_one_step_2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: f32_one_step_2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_one_step_2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: f32_one_step_2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast float 3456.0, %x
ret float %div
}
@@ -213,75 +157,66 @@ define float @f32_one_step_2_divs(float
;
; BDVER2-LABEL: f32_one_step_2_divs:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:1.00]
-; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [7:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_one_step_2_divs:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_one_step_2_divs:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50]
-; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: f32_one_step_2_divs:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50]
-; KNL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_one_step_2_divs:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
-; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: f32_one_step_2_divs:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast float 3456.0, %x
%div2 = fdiv fast float %div, %x
ret float %div2
@@ -335,95 +270,83 @@ define float @f32_two_step_2(float %x) #
;
; BDVER2-LABEL: f32_two_step_2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_two_step_2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2
+; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2
+; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_two_step_2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2
+; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2
+; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_two_step_2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_two_step_2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: f32_two_step_2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_two_step_2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
-; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2
+; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2
+; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: f32_two_step_2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512-NEXT: vmovaps %xmm1, %xmm3
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast float 6789.0, %x
ret float %div
}
@@ -462,70 +385,70 @@ define <4 x float> @v4f32_one_step2(<4 x
;
; BDVER2-LABEL: v4f32_one_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_one_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_one_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_one_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v4f32_one_step2:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %xmm0, %xmm1
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: retq
;
; SKX-LABEL: v4f32_one_step2:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %xmm0, %xmm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; SKX-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
ret <4 x float> %div
}
@@ -567,77 +490,77 @@ define <4 x float> @v4f32_one_step_2_div
;
; BDVER2-LABEL: v4f32_one_step_2_divs:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:1.00]
-; BDVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; BDVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [7:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_one_step_2_divs:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_one_step_2_divs:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50]
-; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v4f32_one_step_2_divs:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50]
-; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %xmm0, %xmm1
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; KNL-NEXT: retq
;
; SKX-LABEL: v4f32_one_step_2_divs:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50]
-; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %xmm0, %xmm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SKX-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
%div2 = fdiv fast <4 x float> %div, %x
ret <4 x float> %div2
@@ -691,95 +614,83 @@ define <4 x float> @v4f32_two_step2(<4 x
;
; BDVER2-LABEL: v4f32_two_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_two_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2
+; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2
+; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2
+; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_two_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2
+; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2
+; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2
+; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_two_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_two_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v4f32_two_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v4f32_two_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2
+; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2
+; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v4f32_two_step2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpps %xmm0, %xmm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %xmm1, %xmm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; AVX512-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
ret <4 x float> %div
}
@@ -826,70 +737,70 @@ define <8 x float> @v8f32_one_step2(<8 x
;
; BDVER2-LABEL: v8f32_one_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_one_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_one_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_one_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v8f32_one_step2:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %ymm0, %ymm1
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: retq
;
; SKX-LABEL: v8f32_one_step2:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %ymm0, %ymm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SKX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
@@ -940,77 +851,77 @@ define <8 x float> @v8f32_one_step_2_div
;
; BDVER2-LABEL: v8f32_one_step_2_divs:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [10:2.00]
-; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_one_step_2_divs:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_one_step_2_divs:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v8f32_one_step_2_divs:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50]
-; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %ymm0, %ymm1
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; KNL-NEXT: retq
;
; SKX-LABEL: v8f32_one_step_2_divs:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [11:0.50]
-; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %ymm0, %ymm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SKX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
%div2 = fdiv fast <8 x float> %div, %x
ret <8 x float> %div2
@@ -1078,95 +989,83 @@ define <8 x float> @v8f32_two_step2(<8 x
;
; BDVER2-LABEL: v8f32_two_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_two_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2
+; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2
+; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_two_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2
+; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_two_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %ymm1, %ymm3
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_two_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v8f32_two_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v8f32_two_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2
+; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v8f32_two_step2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpps %ymm0, %ymm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %ymm1, %ymm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
+; AVX512-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
@@ -1178,50 +1077,10 @@ define <8 x float> @v8f32_no_step(<8 x f
; SSE-NEXT: rcpps %xmm1, %xmm1
; SSE-NEXT: retq
;
-; AVX-RECIP-LABEL: v8f32_no_step:
-; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm0
-; AVX-RECIP-NEXT: retq
-;
-; FMA-RECIP-LABEL: v8f32_no_step:
-; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0
-; FMA-RECIP-NEXT: retq
-;
-; BDVER2-LABEL: v8f32_no_step:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: v8f32_no_step:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: v8f32_no_step:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: v8f32_no_step:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-NO-FMA-LABEL: v8f32_no_step:
-; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v8f32_no_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v8f32_no_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX-LABEL: v8f32_no_step:
+; AVX: # %bb.0:
+; AVX-NEXT: vrcpps %ymm0, %ymm0
+; AVX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -1235,59 +1094,11 @@ define <8 x float> @v8f32_no_step2(<8 x
; SSE-NEXT: mulps {{.*}}(%rip), %xmm1
; SSE-NEXT: retq
;
-; AVX-RECIP-LABEL: v8f32_no_step2:
-; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm0
-; AVX-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; AVX-RECIP-NEXT: retq
-;
-; FMA-RECIP-LABEL: v8f32_no_step2:
-; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0
-; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; FMA-RECIP-NEXT: retq
-;
-; BDVER2-LABEL: v8f32_no_step2:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: v8f32_no_step2:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: v8f32_no_step2:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: v8f32_no_step2:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-NO-FMA-LABEL: v8f32_no_step2:
-; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v8f32_no_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v8f32_no_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX-LABEL: v8f32_no_step2:
+; AVX: # %bb.0:
+; AVX-NEXT: vrcpps %ymm0, %ymm0
+; AVX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
@@ -1361,96 +1172,88 @@ define <16 x float> @v16f32_one_step2(<1
;
; BDVER2-LABEL: v16f32_one_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm4, %ymm0, %ymm4, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm4
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: vfmaddps %ymm4, %ymm0, %ymm4, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_one_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm4, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm4, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm4, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm1, %ymm2
+; BTVER2-NEXT: vrcpps %ymm0, %ymm4
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vmulps %ymm4, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vmulps %ymm0, %ymm4, %ymm0
+; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vaddps %ymm0, %ymm4, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_one_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm1, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vrcpps %ymm0, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_one_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vrcpps %ymm0, %ymm4 # sched: [11:2.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_one_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v16f32_one_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_one_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v16f32_one_step2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1
+; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
ret <16 x float> %div
}
@@ -1532,108 +1335,99 @@ define <16 x float> @v16f32_one_step_2_d
;
; BDVER2-LABEL: v16f32_one_step_2_divs:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [10:2.00]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [10:2.00]
-; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
+; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0
+; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vrcpps %ymm1, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
+; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
+; BTVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_one_step_2_divs:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vrcpps %ymm1, %ymm4 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm4, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vrcpps %ymm1, %ymm4
+; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1
+; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm4, %ymm1
+; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
+; SANDY-NEXT: vmulps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_one_step_2_divs:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:0.50]
-; HASWELL-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
+; HASWELL-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
+; HASWELL-NEXT: vmulps %ymm0, %ymm3, %ymm0
+; HASWELL-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_one_step_2_divs:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm4 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm4, %ymm1, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm4, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm4, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v16f32_one_step_2_divs:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [12:0.50]
-; KNL-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_one_step_2_divs:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [11:0.50]
-; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm4
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm4, %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm4, %ymm1
+; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm4, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm3, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v16f32_one_step_2_divs:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1
+; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1
+; AVX512-NEXT: vmulps %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
%div2 = fdiv fast <16 x float> %div, %x
ret <16 x float> %div2
@@ -1745,138 +1539,126 @@ define <16 x float> @v16f32_two_step2(<1
;
; BDVER2-LABEL: v16f32_two_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_two_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm1, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3
+; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vrcpps %ymm0, %ymm2
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3
+; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_two_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm1, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3
+; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vrcpps %ymm0, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3
+; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_two_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %ymm2, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
+; HASWELL-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NEXT: vmovaps %ymm2, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_two_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v16f32_two_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00]
-; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_two_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50]
-; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm3
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm3
+; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v16f32_two_step2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %zmm1, %zmm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3
+; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
ret <16 x float> %div
}
@@ -1904,43 +1686,38 @@ define <16 x float> @v16f32_no_step(<16
;
; BDVER2-LABEL: v16f32_no_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0
+; BDVER2-NEXT: vrcpps %ymm1, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_no_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm0
+; BTVER2-NEXT: vrcpps %ymm1, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_no_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; SANDY-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm0
+; SANDY-NEXT: vrcpps %ymm1, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_no_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm0
+; HASWELL-NEXT: vrcpps %ymm1, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_no_step:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v16f32_no_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_no_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v16f32_no_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
@@ -1976,55 +1753,49 @@ define <16 x float> @v16f32_no_step2(<16
;
; BDVER2-LABEL: v16f32_no_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm1
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_no_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vrcpps %ymm1, %ymm1
+; BTVER2-NEXT: vrcpps %ymm0, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_no_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm1, %ymm1
+; SANDY-NEXT: vrcpps %ymm0, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_no_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm1, %ymm1
+; HASWELL-NEXT: vrcpps %ymm0, %ymm0
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_no_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v16f32_no_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00]
-; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_no_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00]
-; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v16f32_no_step2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm0
+; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
ret <16 x float> %div
}
Removed: llvm/trunk/test/CodeGen/X86/rtm-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rtm-schedule.ll?rev=353042&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rtm-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rtm-schedule.ll (removed)
@@ -1,62 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=x86-64 -mattr=+rtm | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=cannonlake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=CNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=icelake-client | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=ICL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=icelake-server | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=ICL
-
-define i32 @test_xbegin() nounwind uwtable {
-; GENERIC-LABEL: test_xbegin:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xbegin .LBB0_2 # sched: [100:0.33]
-; GENERIC-NEXT: # %bb.1:
-; GENERIC-NEXT: movl $-1, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: .LBB0_2:
-; GENERIC-NEXT: # XABORT DEF # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKYLAKE-LABEL: test_xbegin:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: xbegin .LBB0_2 # sched: [100:0.25]
-; SKYLAKE-NEXT: # %bb.1:
-; SKYLAKE-NEXT: movl $-1, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-; SKYLAKE-NEXT: .LBB0_2:
-; SKYLAKE-NEXT: # XABORT DEF # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
- %1 = tail call i32 @llvm.x86.xbegin() nounwind
- ret i32 %1
-}
-declare i32 @llvm.x86.xbegin() nounwind
-
-define void @test_xend() nounwind uwtable {
-; GENERIC-LABEL: test_xend:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xend # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKYLAKE-LABEL: test_xend:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: xend # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
- tail call void @llvm.x86.xend() nounwind
- ret void
-}
-declare void @llvm.x86.xend() nounwind
-
-define void @test_xabort() nounwind uwtable {
-; GENERIC-LABEL: test_xabort:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xabort $2 # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKYLAKE-LABEL: test_xabort:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: xabort $2 # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
- tail call void @llvm.x86.xabort(i8 2)
- ret void
-}
-declare void @llvm.x86.xabort(i8) nounwind
Removed: llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll?rev=353042&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll (removed)
@@ -1,471 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-
-
-; uint64_t lshift10(uint64_t a, uint64_t b)
-; {
-; return (a << 10) | (b >> 54);
-; }
-
-define i64 @lshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize {
-; GENERIC-LABEL: lshift10_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift10_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: shldq $10, %rsi, %rax # sched: [4:3.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift10_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: shldq $10, %rsi, %rax # sched: [3:3.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = shl i64 %a, 10
- %shr = lshr i64 %b, 54
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-define i64 @lshift10(i64 %a, i64 %b) nounwind readnone {
-; GENERIC-LABEL: lshift10:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift10:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: shrq $54, %rsi # sched: [1:0.50]
-; BDVER12-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift10:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: shlq $10, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shrq $54, %rsi # sched: [1:0.50]
-; BTVER2-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = shl i64 %a, 10
- %shr = lshr i64 %b, 54
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-; uint64_t rshift10(uint64_t a, uint64_t b)
-; {
-; return (a >> 62) | (b << 2);
-; }
-
-; Should be done via shld
-define i64 @rshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize {
-; GENERIC-LABEL: rshift10_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: rshift10_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: shrdq $62, %rsi, %rax # sched: [4:3.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: rshift10_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: shrdq $62, %rsi, %rax # sched: [3:3.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = lshr i64 %a, 62
- %shr = shl i64 %b, 2
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-; Should be done via lea (x,y,4),z
-define i64 @rshift10(i64 %a, i64 %b) nounwind readnone {
-; GENERIC-LABEL: rshift10:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: rshift10:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: shrq $62, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: rshift10:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: shrq $62, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = lshr i64 %a, 62
- %shr = shl i64 %b, 2
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-;uint64_t lshift(uint64_t a, uint64_t b, uint64_t c)
-;{
-; return (a << c) | (b >> (64-c));
-;}
-
-define i64 @lshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize {
-; GENERIC-LABEL: lshift_cl_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_cl_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_cl_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = shl i64 %a, %c
- %sub = sub nsw i64 64, %c
- %shr = lshr i64 %b, %sub
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
-; GENERIC-LABEL: lshift_cl:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_cl:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: shlq %cl, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: negb %cl # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shrq %cl, %rax # sched: [1:0.50]
-; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_cl:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: negb %cl # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shrq %cl, %rax # sched: [1:0.50]
-; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = shl i64 %a, %c
- %sub = sub nsw i64 64, %c
- %shr = lshr i64 %b, %sub
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-
-;uint64_t rshift(uint64_t a, uint64_t b, int c)
-;{
-; return (a >> c) | (b << (64-c));
-;}
-
-define i64 @rshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize {
-; GENERIC-LABEL: rshift_cl_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: rshift_cl_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: rshift_cl_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shr = lshr i64 %a, %c
- %sub = sub nsw i64 64, %c
- %shl = shl i64 %b, %sub
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
-; GENERIC-LABEL: rshift_cl:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: rshift_cl:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: negb %cl # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: rshift_cl:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: negb %cl # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shr = lshr i64 %a, %c
- %sub = sub nsw i64 64, %c
- %shl = shl i64 %b, %sub
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-; extern uint64_t x;
-;void lshift(uint64_t a, uint64_t b, uint_64_t c)
-;{
-; x = (x << c) | (a >> (64-c));
-;}
- at x = global i64 0, align 4
-
-define void @lshift_mem_cl_optsize(i64 %a, i64 %c) nounwind readnone optsize {
-; GENERIC-LABEL: lshift_mem_cl_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rsi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem_cl_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [4:11.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem_cl_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [9:11.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %b = load i64, i64* @x
- %shl = shl i64 %b, %c
- %sub = sub nsw i64 64, %c
- %shr = lshr i64 %a, %sub
- %or = or i64 %shl, %shr
- store i64 %or, i64* @x
- ret void
-}
-
-define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone {
-; GENERIC-LABEL: lshift_mem_cl:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rsi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem_cl:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BDVER12-NEXT: negb %cl # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem_cl:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00]
-; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BTVER2-NEXT: negb %cl # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %b = load i64, i64* @x
- %shl = shl i64 %b, %c
- %sub = sub nsw i64 64, %c
- %shr = lshr i64 %a, %sub
- %or = or i64 %shl, %shr
- store i64 %or, i64* @x
- ret void
-}
-
-define void @lshift_mem(i64 %a) nounwind readnone {
-; GENERIC-LABEL: lshift_mem:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; BDVER12-NEXT: shrq $54, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: shlq $10, %rax # sched: [1:0.50]
-; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00]
-; BTVER2-NEXT: shrq $54, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shlq $10, %rax # sched: [1:0.50]
-; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %b = load i64, i64* @x
- %shl = shl i64 %b, 10
- %shr = lshr i64 %a, 54
- %or = or i64 %shr, %shl
- store i64 %or, i64* @x
- ret void
-}
-
-define void @lshift_mem_optsize(i64 %a) nounwind readnone optsize {
-; GENERIC-LABEL: lshift_mem_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [4:11.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [9:11.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %b = load i64, i64* @x
- %shl = shl i64 %b, 10
- %shr = lshr i64 %a, 54
- %or = or i64 %shr, %shl
- store i64 %or, i64* @x
- ret void
-}
-
-define void @lshift_mem_b(i64 %b) nounwind readnone {
-; GENERIC-LABEL: lshift_mem_b:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; GENERIC-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem_b:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: shrq $54, %rax # sched: [1:0.50]
-; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem_b:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00]
-; BTVER2-NEXT: shlq $10, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shrq $54, %rax # sched: [1:0.50]
-; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %a = load i64, i64* @x
- %shl = shl i64 %b, 10
- %shr = lshr i64 %a, 54
- %or = or i64 %shr, %shl
- store i64 %or, i64* @x
- ret void
-}
-
-define void @lshift_mem_b_optsize(i64 %b) nounwind readnone optsize {
-; GENERIC-LABEL: lshift_mem_b_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; GENERIC-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem_b_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; BDVER12-NEXT: shrdq $54, %rdi, %rax # sched: [4:3.00]
-; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem_b_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00]
-; BTVER2-NEXT: shrdq $54, %rdi, %rax # sched: [3:3.00]
-; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %a = load i64, i64* @x
- %shl = shl i64 %b, 10
- %shr = lshr i64 %a, 54
- %or = or i64 %shr, %shl
- store i64 %or, i64* @x
- ret void
-}
-
Removed: llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll?rev=353042&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll (removed)
@@ -1,2601 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=i686 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i8 @test_aaa(i8 %a0) optsize {
-; GENERIC-LABEL: test_aaa:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: aaa
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_aaa:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: aaa # sched: [13:6.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_aaa:
-; SLM: # %bb.0:
-; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: aaa # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_aaa:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: aaa # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_aaa:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: aaa # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aaa:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: aaa # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_aaa:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: aaa # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_aaa:
-; SKX: # %bb.0:
-; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: aaa # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_aaa:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: aaa # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_aaa:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: aaa # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_aaa:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: aaa # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = tail call i8 asm "aaa", "=r,r"(i8 %a0) nounwind
- ret i8 %1
-}
-
-define void @test_aad(i16 %a0) optsize {
-; GENERIC-LABEL: test_aad:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: aad
-; GENERIC-NEXT: aad $16
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_aad:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: aad # sched: [7:3.50]
-; ATOM-NEXT: aad $16 # sched: [7:3.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_aad:
-; SLM: # %bb.0:
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: aad # sched: [100:1.00]
-; SLM-NEXT: aad $16 # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_aad:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: aad # sched: [100:0.33]
-; SANDY-NEXT: aad $16 # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_aad:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: aad # sched: [100:0.25]
-; HASWELL-NEXT: aad $16 # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aad:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: aad # sched: [100:0.25]
-; BROADWELL-NEXT: aad $16 # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_aad:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: aad # sched: [100:0.25]
-; SKYLAKE-NEXT: aad $16 # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_aad:
-; SKX: # %bb.0:
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: aad # sched: [100:0.25]
-; SKX-NEXT: aad $16 # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_aad:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: aad # sched: [100:0.50]
-; BDVER2-NEXT: aad $16 # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_aad:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: aad # sched: [100:0.50]
-; BTVER2-NEXT: aad $16 # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_aad:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: aad # sched: [100:0.25]
-; ZNVER1-NEXT: aad $16 # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "aad \0A\09 aad $1", "r,i"(i16 %a0, i16 16) nounwind
- ret void
-}
-
-define void @test_aam(i8 %a0) optsize {
-; GENERIC-LABEL: test_aam:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: aam
-; GENERIC-NEXT: aam $16
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_aam:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: aam # sched: [21:10.50]
-; ATOM-NEXT: aam $16 # sched: [21:10.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_aam:
-; SLM: # %bb.0:
-; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: aam # sched: [100:1.00]
-; SLM-NEXT: aam $16 # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_aam:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: aam # sched: [100:0.33]
-; SANDY-NEXT: aam $16 # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_aam:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: aam # sched: [100:0.25]
-; HASWELL-NEXT: aam $16 # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aam:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: aam # sched: [100:0.25]
-; BROADWELL-NEXT: aam $16 # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_aam:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: aam # sched: [100:0.25]
-; SKYLAKE-NEXT: aam $16 # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_aam:
-; SKX: # %bb.0:
-; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: aam # sched: [100:0.25]
-; SKX-NEXT: aam $16 # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_aam:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: aam # sched: [100:0.50]
-; BDVER2-NEXT: aam $16 # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_aam:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: aam # sched: [100:0.50]
-; BTVER2-NEXT: aam $16 # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_aam:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: aam # sched: [100:0.25]
-; ZNVER1-NEXT: aam $16 # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "aam \0A\09 aam $1", "r,i"(i8 %a0, i8 16) nounwind
- ret void
-}
-
-define i8 @test_aas(i8 %a0) optsize {
-; GENERIC-LABEL: test_aas:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: aas
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_aas:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: aas # sched: [13:6.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_aas:
-; SLM: # %bb.0:
-; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: aas # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_aas:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: aas # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_aas:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: aas # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aas:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: aas # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_aas:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: aas # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_aas:
-; SKX: # %bb.0:
-; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: aas # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_aas:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: aas # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_aas:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: aas # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_aas:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: aas # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = tail call i8 asm "aas", "=r,r"(i8 %a0) nounwind
- ret i8 %1
-}
-
-define void @test_arpl(i16 %a0, i16 *%a1) optsize {
-; GENERIC-LABEL: test_arpl:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: arpl %ax, (%ecx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_arpl:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: arpl %ax, (%ecx) # sched: [23:11.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_arpl:
-; SLM: # %bb.0:
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: arpl %ax, (%ecx) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_arpl:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: arpl %ax, (%ecx) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_arpl:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_arpl:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_arpl:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_arpl:
-; SKX: # %bb.0:
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_arpl:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_arpl:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_arpl:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "arpl $0, $1", "r,*m"(i16 %a0, i16 *%a1)
- ret void
-}
-
-define void @test_bound(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3) optsize {
-; GENERIC-LABEL: test_bound:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pushl %esi
-; GENERIC-NEXT: .cfi_def_cfa_offset 8
-; GENERIC-NEXT: .cfi_offset %esi, -8
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %esi
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: bound %ax, (%esi)
-; GENERIC-NEXT: bound %ecx, (%edx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: popl %esi
-; GENERIC-NEXT: .cfi_def_cfa_offset 4
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_bound:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pushl %esi # sched: [1:1.00]
-; ATOM-NEXT: .cfi_def_cfa_offset 8
-; ATOM-NEXT: .cfi_offset %esi, -8
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: bound %ax, (%esi) # sched: [11:5.50]
-; ATOM-NEXT: bound %ecx, (%edx) # sched: [11:5.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: popl %esi # sched: [1:1.00]
-; ATOM-NEXT: .cfi_def_cfa_offset 4
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_bound:
-; SLM: # %bb.0:
-; SLM-NEXT: pushl %esi # sched: [1:1.00]
-; SLM-NEXT: .cfi_def_cfa_offset 8
-; SLM-NEXT: .cfi_offset %esi, -8
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: bound %ax, (%esi) # sched: [100:1.00]
-; SLM-NEXT: bound %ecx, (%edx) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: popl %esi # sched: [3:1.00]
-; SLM-NEXT: .cfi_def_cfa_offset 4
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bound:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pushl %esi # sched: [5:1.00]
-; SANDY-NEXT: .cfi_def_cfa_offset 8
-; SANDY-NEXT: .cfi_offset %esi, -8
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: bound %ax, (%esi) # sched: [100:0.33]
-; SANDY-NEXT: bound %ecx, (%edx) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: popl %esi # sched: [6:0.50]
-; SANDY-NEXT: .cfi_def_cfa_offset 4
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_bound:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pushl %esi # sched: [2:1.00]
-; HASWELL-NEXT: .cfi_def_cfa_offset 8
-; HASWELL-NEXT: .cfi_offset %esi, -8
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: bound %ax, (%esi) # sched: [1:3.75]
-; HASWELL-NEXT: bound %ecx, (%edx) # sched: [1:3.75]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: popl %esi # sched: [6:0.50]
-; HASWELL-NEXT: .cfi_def_cfa_offset 4
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bound:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pushl %esi # sched: [2:1.00]
-; BROADWELL-NEXT: .cfi_def_cfa_offset 8
-; BROADWELL-NEXT: .cfi_offset %esi, -8
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: bound %ax, (%esi) # sched: [100:0.25]
-; BROADWELL-NEXT: bound %ecx, (%edx) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: popl %esi # sched: [6:0.50]
-; BROADWELL-NEXT: .cfi_def_cfa_offset 4
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_bound:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pushl %esi # sched: [2:1.00]
-; SKYLAKE-NEXT: .cfi_def_cfa_offset 8
-; SKYLAKE-NEXT: .cfi_offset %esi, -8
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: bound %ax, (%esi) # sched: [100:0.25]
-; SKYLAKE-NEXT: bound %ecx, (%edx) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: popl %esi # sched: [6:0.50]
-; SKYLAKE-NEXT: .cfi_def_cfa_offset 4
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_bound:
-; SKX: # %bb.0:
-; SKX-NEXT: pushl %esi # sched: [2:1.00]
-; SKX-NEXT: .cfi_def_cfa_offset 8
-; SKX-NEXT: .cfi_offset %esi, -8
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: bound %ax, (%esi) # sched: [100:0.25]
-; SKX-NEXT: bound %ecx, (%edx) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: popl %esi # sched: [6:0.50]
-; SKX-NEXT: .cfi_def_cfa_offset 4
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_bound:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pushl %esi # sched: [1:1.00]
-; BDVER2-NEXT: .cfi_def_cfa_offset 8
-; BDVER2-NEXT: .cfi_offset %esi, -8
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: bound %ax, (%esi) # sched: [100:0.50]
-; BDVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: popl %esi # sched: [5:0.50]
-; BDVER2-NEXT: .cfi_def_cfa_offset 4
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bound:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pushl %esi # sched: [1:1.00]
-; BTVER2-NEXT: .cfi_def_cfa_offset 8
-; BTVER2-NEXT: .cfi_offset %esi, -8
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: bound %ax, (%esi) # sched: [100:0.50]
-; BTVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: popl %esi # sched: [3:1.00]
-; BTVER2-NEXT: .cfi_def_cfa_offset 4
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bound:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pushl %esi # sched: [1:0.50]
-; ZNVER1-NEXT: .cfi_def_cfa_offset 8
-; ZNVER1-NEXT: .cfi_offset %esi, -8
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: bound %ax, (%esi) # sched: [100:0.25]
-; ZNVER1-NEXT: bound %ecx, (%edx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: popl %esi # sched: [8:0.50]
-; ZNVER1-NEXT: .cfi_def_cfa_offset 4
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "bound $0, $1 \0A\09 bound $2, $3", "r,*m,r,*m"(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3)
- ret void
-}
-
-; TODO - test_call
-
-define i8 @test_daa(i8 %a0) optsize {
-; GENERIC-LABEL: test_daa:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: daa
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_daa:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: daa # sched: [18:9.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_daa:
-; SLM: # %bb.0:
-; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: daa # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_daa:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: daa # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_daa:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: daa # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_daa:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: daa # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_daa:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: daa # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_daa:
-; SKX: # %bb.0:
-; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: daa # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_daa:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: daa # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_daa:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: daa # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_daa:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: daa # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = tail call i8 asm "daa", "=r,r"(i8 %a0) nounwind
- ret i8 %1
-}
-
-define i8 @test_das(i8 %a0) optsize {
-; GENERIC-LABEL: test_das:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: das
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_das:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: das # sched: [20:10.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_das:
-; SLM: # %bb.0:
-; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: das # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_das:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: das # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_das:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: das # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_das:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: das # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_das:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: das # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_das:
-; SKX: # %bb.0:
-; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: das # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_das:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: das # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_das:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: das # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_das:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: das # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = tail call i8 asm "das", "=r,r"(i8 %a0) nounwind
- ret i8 %1
-}
-
-define void @test_dec16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_dec16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: decw %ax
-; GENERIC-NEXT: decw (%ecx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_dec16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: decw %ax # sched: [1:0.50]
-; ATOM-NEXT: decw (%ecx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_dec16:
-; SLM: # %bb.0:
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: decw %ax # sched: [1:0.50]
-; SLM-NEXT: decw (%ecx) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_dec16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: decw %ax # sched: [1:0.33]
-; SANDY-NEXT: decw (%ecx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_dec16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: decw %ax # sched: [1:0.25]
-; HASWELL-NEXT: decw (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dec16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: decw %ax # sched: [1:0.25]
-; BROADWELL-NEXT: decw (%ecx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_dec16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: decw %ax # sched: [1:0.25]
-; SKYLAKE-NEXT: decw (%ecx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_dec16:
-; SKX: # %bb.0:
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: decw %ax # sched: [1:0.25]
-; SKX-NEXT: decw (%ecx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_dec16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: decw %ax # sched: [1:0.50]
-; BDVER2-NEXT: decw (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_dec16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: decw %ax # sched: [1:0.50]
-; BTVER2-NEXT: decw (%ecx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_dec16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: decw %ax # sched: [1:0.25]
-; ZNVER1-NEXT: decw (%ecx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "decw $0 \0A\09 decw $1", "r,*m"(i16 %a0, i16* %a1) nounwind
- ret void
-}
-define void @test_dec32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_dec32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: decl %eax
-; GENERIC-NEXT: decl (%ecx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_dec32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: decl %eax # sched: [1:0.50]
-; ATOM-NEXT: decl (%ecx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_dec32:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: decl %eax # sched: [1:0.50]
-; SLM-NEXT: decl (%ecx) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_dec32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: decl %eax # sched: [1:0.33]
-; SANDY-NEXT: decl (%ecx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_dec32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: decl %eax # sched: [1:0.25]
-; HASWELL-NEXT: decl (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dec32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: decl %eax # sched: [1:0.25]
-; BROADWELL-NEXT: decl (%ecx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_dec32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: decl %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: decl (%ecx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_dec32:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: decl %eax # sched: [1:0.25]
-; SKX-NEXT: decl (%ecx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_dec32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: decl %eax # sched: [1:0.50]
-; BDVER2-NEXT: decl (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_dec32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: decl %eax # sched: [1:0.50]
-; BTVER2-NEXT: decl (%ecx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_dec32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: decl %eax # sched: [1:0.25]
-; ZNVER1-NEXT: decl (%ecx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "decl $0 \0A\09 decl $1", "r,*m"(i32 %a0, i32* %a1) nounwind
- ret void
-}
-
-define void @test_inc16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_inc16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: incw %ax
-; GENERIC-NEXT: incw (%ecx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_inc16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: incw %ax # sched: [1:0.50]
-; ATOM-NEXT: incw (%ecx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_inc16:
-; SLM: # %bb.0:
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: incw %ax # sched: [1:0.50]
-; SLM-NEXT: incw (%ecx) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_inc16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: incw %ax # sched: [1:0.33]
-; SANDY-NEXT: incw (%ecx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_inc16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: incw %ax # sched: [1:0.25]
-; HASWELL-NEXT: incw (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_inc16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: incw %ax # sched: [1:0.25]
-; BROADWELL-NEXT: incw (%ecx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_inc16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: incw %ax # sched: [1:0.25]
-; SKYLAKE-NEXT: incw (%ecx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_inc16:
-; SKX: # %bb.0:
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: incw %ax # sched: [1:0.25]
-; SKX-NEXT: incw (%ecx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_inc16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: incw %ax # sched: [1:0.50]
-; BDVER2-NEXT: incw (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_inc16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: incw %ax # sched: [1:0.50]
-; BTVER2-NEXT: incw (%ecx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_inc16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: incw %ax # sched: [1:0.25]
-; ZNVER1-NEXT: incw (%ecx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "incw $0 \0A\09 incw $1", "r,*m"(i16 %a0, i16* %a1) nounwind
- ret void
-}
-define void @test_inc32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_inc32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: incl %eax
-; GENERIC-NEXT: incl (%ecx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_inc32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: incl %eax # sched: [1:0.50]
-; ATOM-NEXT: incl (%ecx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_inc32:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: incl %eax # sched: [1:0.50]
-; SLM-NEXT: incl (%ecx) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_inc32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: incl %eax # sched: [1:0.33]
-; SANDY-NEXT: incl (%ecx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_inc32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: incl %eax # sched: [1:0.25]
-; HASWELL-NEXT: incl (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_inc32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: incl %eax # sched: [1:0.25]
-; BROADWELL-NEXT: incl (%ecx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_inc32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: incl %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: incl (%ecx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_inc32:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: incl %eax # sched: [1:0.25]
-; SKX-NEXT: incl (%ecx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_inc32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: incl %eax # sched: [1:0.50]
-; BDVER2-NEXT: incl (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_inc32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: incl %eax # sched: [1:0.50]
-; BTVER2-NEXT: incl (%ecx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_inc32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: incl %eax # sched: [1:0.25]
-; ZNVER1-NEXT: incl (%ecx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "incl $0 \0A\09 incl $1", "r,*m"(i32 %a0, i32* %a1) nounwind
- ret void
-}
-
-define void @test_into() optsize {
-; GENERIC-LABEL: test_into:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: into
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_into:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: into # sched: [6:3.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_into:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: into # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_into:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: into # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_into:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: into # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_into:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: into # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_into:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: into # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_into:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: into # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_into:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: into # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_into:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: into # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_into:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: into # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "into", ""()
- ret void
-}
-
-; TODO - test_jmp
-
-define void @test_jcxz_jecxz() optsize {
-; GENERIC-LABEL: test_jcxz_jecxz:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: JXTGT:
-; GENERIC-NEXT: jcxz JXTGT
-; GENERIC-NEXT: jecxz JXTGT
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_jcxz_jecxz:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: JXTGT:
-; ATOM-NEXT: jcxz JXTGT # sched: [4:2.00]
-; ATOM-NEXT: jecxz JXTGT # sched: [4:2.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_jcxz_jecxz:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: JXTGT:
-; SLM-NEXT: jcxz JXTGT # sched: [1:1.00]
-; SLM-NEXT: jecxz JXTGT # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_jcxz_jecxz:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: JXTGT:
-; SANDY-NEXT: jcxz JXTGT # sched: [2:1.00]
-; SANDY-NEXT: jecxz JXTGT # sched: [2:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_jcxz_jecxz:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: JXTGT:
-; HASWELL-NEXT: jcxz JXTGT # sched: [2:0.50]
-; HASWELL-NEXT: jecxz JXTGT # sched: [2:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_jcxz_jecxz:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: JXTGT:
-; BROADWELL-NEXT: jcxz JXTGT # sched: [2:0.50]
-; BROADWELL-NEXT: jecxz JXTGT # sched: [2:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_jcxz_jecxz:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: JXTGT:
-; SKYLAKE-NEXT: jcxz JXTGT # sched: [2:0.50]
-; SKYLAKE-NEXT: jecxz JXTGT # sched: [2:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_jcxz_jecxz:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: JXTGT:
-; SKX-NEXT: jcxz JXTGT # sched: [2:0.50]
-; SKX-NEXT: jecxz JXTGT # sched: [2:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_jcxz_jecxz:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: JXTGT:
-; BDVER2-NEXT: jcxz JXTGT # sched: [1:1.00]
-; BDVER2-NEXT: jecxz JXTGT # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_jcxz_jecxz:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: JXTGT:
-; BTVER2-NEXT: jcxz JXTGT # sched: [1:0.50]
-; BTVER2-NEXT: jecxz JXTGT # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_jcxz_jecxz:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: JXTGT:
-; ZNVER1-NEXT: jcxz JXTGT # sched: [1:0.50]
-; ZNVER1-NEXT: jecxz JXTGT # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "JXTGT: \0A\09 jcxz JXTGT \0A\09 jecxz JXTGT", ""()
- ret void
-}
-
-; TODO - test_lds
-
-define void @test_leave() optsize {
-; GENERIC-LABEL: test_leave:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: leave
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_leave:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: leave # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_leave:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: leave # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_leave:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: leave # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_leave:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: leave # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_leave:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: leave # sched: [7:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_leave:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: leave # sched: [7:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_leave:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: leave # sched: [7:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_leave:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: leave # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_leave:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: leave # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_leave:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: leave # sched: [8:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "leave", ""() nounwind
- ret void
-}
-
-; TODO - test_les
-
-define void @test_pop_push() optsize {
-; GENERIC-LABEL: test_pop_push:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popl %ds
-; GENERIC-NEXT: popl %es
-; GENERIC-NEXT: popl %ss
-; GENERIC-NEXT: popl %fs
-; GENERIC-NEXT: popl %gs
-; GENERIC-NEXT: pushl %cs
-; GENERIC-NEXT: pushl %ds
-; GENERIC-NEXT: pushl %es
-; GENERIC-NEXT: pushl %ss
-; GENERIC-NEXT: pushl %fs
-; GENERIC-NEXT: pushl %gs
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_pop_push:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popl %ds # sched: [29:14.50]
-; ATOM-NEXT: popl %es # sched: [29:14.50]
-; ATOM-NEXT: popl %ss # sched: [48:24.00]
-; ATOM-NEXT: popl %fs # sched: [29:14.50]
-; ATOM-NEXT: popl %gs # sched: [29:14.50]
-; ATOM-NEXT: pushl %cs # sched: [2:1.00]
-; ATOM-NEXT: pushl %ds # sched: [2:1.00]
-; ATOM-NEXT: pushl %es # sched: [2:1.00]
-; ATOM-NEXT: pushl %ss # sched: [2:1.00]
-; ATOM-NEXT: pushl %fs # sched: [2:1.00]
-; ATOM-NEXT: pushl %gs # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_pop_push:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: popl %ds # sched: [100:1.00]
-; SLM-NEXT: popl %es # sched: [100:1.00]
-; SLM-NEXT: popl %ss # sched: [100:1.00]
-; SLM-NEXT: popl %fs # sched: [100:1.00]
-; SLM-NEXT: popl %gs # sched: [100:1.00]
-; SLM-NEXT: pushl %cs # sched: [100:1.00]
-; SLM-NEXT: pushl %ds # sched: [100:1.00]
-; SLM-NEXT: pushl %es # sched: [100:1.00]
-; SLM-NEXT: pushl %ss # sched: [100:1.00]
-; SLM-NEXT: pushl %fs # sched: [100:1.00]
-; SLM-NEXT: pushl %gs # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pop_push:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popl %ds # sched: [100:0.33]
-; SANDY-NEXT: popl %es # sched: [100:0.33]
-; SANDY-NEXT: popl %ss # sched: [100:0.33]
-; SANDY-NEXT: popl %fs # sched: [100:0.33]
-; SANDY-NEXT: popl %gs # sched: [100:0.33]
-; SANDY-NEXT: pushl %cs # sched: [100:0.33]
-; SANDY-NEXT: pushl %ds # sched: [100:0.33]
-; SANDY-NEXT: pushl %es # sched: [100:0.33]
-; SANDY-NEXT: pushl %ss # sched: [100:0.33]
-; SANDY-NEXT: pushl %fs # sched: [100:0.33]
-; SANDY-NEXT: pushl %gs # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_pop_push:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popl %ds # sched: [100:0.25]
-; HASWELL-NEXT: popl %es # sched: [100:0.25]
-; HASWELL-NEXT: popl %ss # sched: [100:0.25]
-; HASWELL-NEXT: popl %fs # sched: [100:0.25]
-; HASWELL-NEXT: popl %gs # sched: [100:0.25]
-; HASWELL-NEXT: pushl %cs # sched: [100:0.25]
-; HASWELL-NEXT: pushl %ds # sched: [100:0.25]
-; HASWELL-NEXT: pushl %es # sched: [100:0.25]
-; HASWELL-NEXT: pushl %ss # sched: [100:0.25]
-; HASWELL-NEXT: pushl %fs # sched: [100:0.25]
-; HASWELL-NEXT: pushl %gs # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pop_push:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popl %ds # sched: [100:0.25]
-; BROADWELL-NEXT: popl %es # sched: [100:0.25]
-; BROADWELL-NEXT: popl %ss # sched: [100:0.25]
-; BROADWELL-NEXT: popl %fs # sched: [100:0.25]
-; BROADWELL-NEXT: popl %gs # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %cs # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %ds # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %es # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %ss # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %fs # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %gs # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_pop_push:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popl %ds # sched: [100:0.25]
-; SKYLAKE-NEXT: popl %es # sched: [100:0.25]
-; SKYLAKE-NEXT: popl %ss # sched: [100:0.25]
-; SKYLAKE-NEXT: popl %fs # sched: [100:0.25]
-; SKYLAKE-NEXT: popl %gs # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %cs # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %ds # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %es # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %ss # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %fs # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %gs # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_pop_push:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: popl %ds # sched: [100:0.25]
-; SKX-NEXT: popl %es # sched: [100:0.25]
-; SKX-NEXT: popl %ss # sched: [100:0.25]
-; SKX-NEXT: popl %fs # sched: [100:0.25]
-; SKX-NEXT: popl %gs # sched: [100:0.25]
-; SKX-NEXT: pushl %cs # sched: [100:0.25]
-; SKX-NEXT: pushl %ds # sched: [100:0.25]
-; SKX-NEXT: pushl %es # sched: [100:0.25]
-; SKX-NEXT: pushl %ss # sched: [100:0.25]
-; SKX-NEXT: pushl %fs # sched: [100:0.25]
-; SKX-NEXT: pushl %gs # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_pop_push:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popl %ds # sched: [100:0.50]
-; BDVER2-NEXT: popl %es # sched: [100:0.50]
-; BDVER2-NEXT: popl %ss # sched: [100:0.50]
-; BDVER2-NEXT: popl %fs # sched: [100:0.50]
-; BDVER2-NEXT: popl %gs # sched: [100:0.50]
-; BDVER2-NEXT: pushl %cs # sched: [100:0.50]
-; BDVER2-NEXT: pushl %ds # sched: [100:0.50]
-; BDVER2-NEXT: pushl %es # sched: [100:0.50]
-; BDVER2-NEXT: pushl %ss # sched: [100:0.50]
-; BDVER2-NEXT: pushl %fs # sched: [100:0.50]
-; BDVER2-NEXT: pushl %gs # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pop_push:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popl %ds # sched: [100:0.50]
-; BTVER2-NEXT: popl %es # sched: [100:0.50]
-; BTVER2-NEXT: popl %ss # sched: [100:0.50]
-; BTVER2-NEXT: popl %fs # sched: [100:0.50]
-; BTVER2-NEXT: popl %gs # sched: [100:0.50]
-; BTVER2-NEXT: pushl %cs # sched: [100:0.50]
-; BTVER2-NEXT: pushl %ds # sched: [100:0.50]
-; BTVER2-NEXT: pushl %es # sched: [100:0.50]
-; BTVER2-NEXT: pushl %ss # sched: [100:0.50]
-; BTVER2-NEXT: pushl %fs # sched: [100:0.50]
-; BTVER2-NEXT: pushl %gs # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pop_push:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popl %ds # sched: [100:0.25]
-; ZNVER1-NEXT: popl %es # sched: [100:0.25]
-; ZNVER1-NEXT: popl %ss # sched: [100:0.25]
-; ZNVER1-NEXT: popl %fs # sched: [100:0.25]
-; ZNVER1-NEXT: popl %gs # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %cs # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %ds # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %es # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %ss # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %fs # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %gs # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "pop %DS \0A\09 pop %ES \0A\09 pop %SS \0A\09 pop %FS \0A\09 pop %GS \0A\09 push %CS \0A\09 push %DS \0A\09 push %ES \0A\09 push %SS \0A\09 push %FS \0A\09 push %GS", ""()
- ret void
-}
-define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
-; GENERIC-LABEL: test_pop_push_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popw %ax
-; GENERIC-NEXT: popw (%ecx)
-; GENERIC-NEXT: pushw %ax
-; GENERIC-NEXT: pushw (%ecx)
-; GENERIC-NEXT: pushw $4095 # imm = 0xFFF
-; GENERIC-NEXT: pushw $7
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_pop_push_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popw %ax # sched: [2:1.00]
-; ATOM-NEXT: popw (%ecx) # sched: [3:1.50]
-; ATOM-NEXT: pushw %ax # sched: [1:1.00]
-; ATOM-NEXT: pushw (%ecx) # sched: [2:1.00]
-; ATOM-NEXT: pushw $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: pushw $7 # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_pop_push_16:
-; SLM: # %bb.0:
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: popw %ax # sched: [3:1.00]
-; SLM-NEXT: popw (%ecx) # sched: [4:2.00]
-; SLM-NEXT: pushw %ax # sched: [1:1.00]
-; SLM-NEXT: pushw (%ecx) # sched: [4:2.00]
-; SLM-NEXT: pushw $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [1:1.00]
-; SLM-NEXT: pushw $7 # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pop_push_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popw %ax # sched: [6:0.50]
-; SANDY-NEXT: popw (%ecx) # sched: [6:0.50]
-; SANDY-NEXT: pushw %ax # sched: [5:1.00]
-; SANDY-NEXT: pushw (%ecx) # sched: [5:1.00]
-; SANDY-NEXT: pushw $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [1:1.00]
-; SANDY-NEXT: pushw $7 # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_pop_push_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popw %ax # sched: [6:0.50]
-; HASWELL-NEXT: popw (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: pushw %ax # sched: [2:1.00]
-; HASWELL-NEXT: pushw (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: pushw $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:1.00]
-; HASWELL-NEXT: pushw $7 # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pop_push_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popw %ax # sched: [6:0.50]
-; BROADWELL-NEXT: popw (%ecx) # sched: [6:1.00]
-; BROADWELL-NEXT: pushw %ax # sched: [2:1.00]
-; BROADWELL-NEXT: pushw (%ecx) # sched: [6:1.00]
-; BROADWELL-NEXT: pushw $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [1:1.00]
-; BROADWELL-NEXT: pushw $7 # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_pop_push_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popw %ax # sched: [6:0.50]
-; SKYLAKE-NEXT: popw (%ecx) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushw %ax # sched: [2:1.00]
-; SKYLAKE-NEXT: pushw (%ecx) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushw $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [1:1.00]
-; SKYLAKE-NEXT: pushw $7 # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_pop_push_16:
-; SKX: # %bb.0:
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: popw %ax # sched: [6:0.50]
-; SKX-NEXT: popw (%ecx) # sched: [6:1.00]
-; SKX-NEXT: pushw %ax # sched: [2:1.00]
-; SKX-NEXT: pushw (%ecx) # sched: [6:1.00]
-; SKX-NEXT: pushw $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [1:1.00]
-; SKX-NEXT: pushw $7 # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_pop_push_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popw %ax # sched: [5:0.50]
-; BDVER2-NEXT: popw (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: pushw %ax # sched: [1:1.00]
-; BDVER2-NEXT: pushw (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: pushw $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: pushw $7 # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pop_push_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popw %ax # sched: [3:1.00]
-; BTVER2-NEXT: popw (%ecx) # sched: [4:1.00]
-; BTVER2-NEXT: pushw %ax # sched: [1:1.00]
-; BTVER2-NEXT: pushw (%ecx) # sched: [4:1.00]
-; BTVER2-NEXT: pushw $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: pushw $7 # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pop_push_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popw %ax # sched: [8:0.50]
-; ZNVER1-NEXT: popw (%ecx) # sched: [5:0.50]
-; ZNVER1-NEXT: pushw %ax # sched: [1:0.50]
-; ZNVER1-NEXT: pushw (%ecx) # sched: [4:0.50]
-; ZNVER1-NEXT: pushw $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [1:0.50]
-; ZNVER1-NEXT: pushw $7 # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = call i16 asm sideeffect "popw $0 \0A\09 popw $2 \0A\09 pushw $1 \0A\09 pushw $2 \0A\09 pushw $3 \0A\09 pushw $4", "=r,r,*m,i,i"(i16 %a0, i16 *%a1, i16 4095, i8 7)
- ret i16 %1
-}
-define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize {
-; GENERIC-LABEL: test_pop_push_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popl %eax
-; GENERIC-NEXT: popl (%ecx)
-; GENERIC-NEXT: pushl %eax
-; GENERIC-NEXT: pushl (%ecx)
-; GENERIC-NEXT: pushl $4095 # imm = 0xFFF
-; GENERIC-NEXT: pushl $7
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_pop_push_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popl %eax # sched: [1:1.00]
-; ATOM-NEXT: popl (%ecx) # sched: [3:1.50]
-; ATOM-NEXT: pushl %eax # sched: [1:1.00]
-; ATOM-NEXT: pushl (%ecx) # sched: [2:1.00]
-; ATOM-NEXT: pushl $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: pushl $7 # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_pop_push_32:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: popl %eax # sched: [3:1.00]
-; SLM-NEXT: popl (%ecx) # sched: [4:2.00]
-; SLM-NEXT: pushl %eax # sched: [1:1.00]
-; SLM-NEXT: pushl (%ecx) # sched: [4:2.00]
-; SLM-NEXT: pushl $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [1:1.00]
-; SLM-NEXT: pushl $7 # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pop_push_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popl %eax # sched: [6:0.50]
-; SANDY-NEXT: popl (%ecx) # sched: [6:0.50]
-; SANDY-NEXT: pushl %eax # sched: [5:1.00]
-; SANDY-NEXT: pushl (%ecx) # sched: [5:1.00]
-; SANDY-NEXT: pushl $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [1:1.00]
-; SANDY-NEXT: pushl $7 # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_pop_push_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popl %eax # sched: [6:0.50]
-; HASWELL-NEXT: popl (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: pushl %eax # sched: [2:1.00]
-; HASWELL-NEXT: pushl (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: pushl $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:1.00]
-; HASWELL-NEXT: pushl $7 # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pop_push_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popl %eax # sched: [6:0.50]
-; BROADWELL-NEXT: popl (%ecx) # sched: [6:1.00]
-; BROADWELL-NEXT: pushl %eax # sched: [2:1.00]
-; BROADWELL-NEXT: pushl (%ecx) # sched: [6:1.00]
-; BROADWELL-NEXT: pushl $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [1:1.00]
-; BROADWELL-NEXT: pushl $7 # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_pop_push_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popl %eax # sched: [6:0.50]
-; SKYLAKE-NEXT: popl (%ecx) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushl %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: pushl (%ecx) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushl $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [1:1.00]
-; SKYLAKE-NEXT: pushl $7 # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_pop_push_32:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: popl %eax # sched: [6:0.50]
-; SKX-NEXT: popl (%ecx) # sched: [6:1.00]
-; SKX-NEXT: pushl %eax # sched: [2:1.00]
-; SKX-NEXT: pushl (%ecx) # sched: [6:1.00]
-; SKX-NEXT: pushl $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [1:1.00]
-; SKX-NEXT: pushl $7 # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_pop_push_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popl %eax # sched: [5:0.50]
-; BDVER2-NEXT: popl (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: pushl %eax # sched: [1:1.00]
-; BDVER2-NEXT: pushl (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: pushl $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: pushl $7 # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pop_push_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popl %eax # sched: [3:1.00]
-; BTVER2-NEXT: popl (%ecx) # sched: [4:1.00]
-; BTVER2-NEXT: pushl %eax # sched: [1:1.00]
-; BTVER2-NEXT: pushl (%ecx) # sched: [4:1.00]
-; BTVER2-NEXT: pushl $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: pushl $7 # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pop_push_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popl %eax # sched: [8:0.50]
-; ZNVER1-NEXT: popl (%ecx) # sched: [9:1.00]
-; ZNVER1-NEXT: pushl %eax # sched: [1:0.50]
-; ZNVER1-NEXT: pushl (%ecx) # sched: [4:0.50]
-; ZNVER1-NEXT: pushl $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [1:0.50]
-; ZNVER1-NEXT: pushl $7 # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = call i32 asm sideeffect "popl $0 \0A\09 popl $2 \0A\09 pushl $1 \0A\09 pushl $2 \0A\09 pushl $3 \0A\09 pushl $4", "=r,r,*m,i,i"(i32 %a0, i32 *%a1, i32 4095, i8 7)
- ret i32 %1
-}
-
-define void @test_popa_popf_pusha_pushf() optsize {
-; GENERIC-LABEL: test_popa_popf_pusha_pushf:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popal
-; GENERIC-NEXT: popfl
-; GENERIC-NEXT: pushal
-; GENERIC-NEXT: pushfl
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_popa_popf_pusha_pushf:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popal # sched: [9:4.50]
-; ATOM-NEXT: popfl # sched: [26:13.00]
-; ATOM-NEXT: pushal # sched: [8:4.00]
-; ATOM-NEXT: pushfl # sched: [9:4.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_popa_popf_pusha_pushf:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: popal # sched: [3:1.00]
-; SLM-NEXT: popfl # sched: [3:1.00]
-; SLM-NEXT: pushal # sched: [1:1.00]
-; SLM-NEXT: pushfl # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_popa_popf_pusha_pushf:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popal # sched: [5:0.50]
-; SANDY-NEXT: popfl # sched: [5:0.50]
-; SANDY-NEXT: pushal # sched: [1:1.00]
-; SANDY-NEXT: pushfl # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_popa_popf_pusha_pushf:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popal # sched: [1:4.50]
-; HASWELL-NEXT: popfl # sched: [5:0.50]
-; HASWELL-NEXT: pushal # sched: [1:4.75]
-; HASWELL-NEXT: pushfl # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_popa_popf_pusha_pushf:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popal # sched: [5:0.50]
-; BROADWELL-NEXT: popfl # sched: [5:0.50]
-; BROADWELL-NEXT: pushal # sched: [1:1.00]
-; BROADWELL-NEXT: pushfl # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_popa_popf_pusha_pushf:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popal # sched: [5:0.50]
-; SKYLAKE-NEXT: popfl # sched: [5:0.50]
-; SKYLAKE-NEXT: pushal # sched: [1:1.00]
-; SKYLAKE-NEXT: pushfl # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_popa_popf_pusha_pushf:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: popal # sched: [5:0.50]
-; SKX-NEXT: popfl # sched: [5:0.50]
-; SKX-NEXT: pushal # sched: [1:1.00]
-; SKX-NEXT: pushfl # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_popa_popf_pusha_pushf:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popal # sched: [5:0.50]
-; BDVER2-NEXT: popfl # sched: [5:0.50]
-; BDVER2-NEXT: pushal # sched: [1:1.00]
-; BDVER2-NEXT: pushfl # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_popa_popf_pusha_pushf:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popal # sched: [3:1.00]
-; BTVER2-NEXT: popfl # sched: [3:1.00]
-; BTVER2-NEXT: pushal # sched: [1:1.00]
-; BTVER2-NEXT: pushfl # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_popa_popf_pusha_pushf:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popal # sched: [100:0.25]
-; ZNVER1-NEXT: popfl # sched: [100:0.25]
-; ZNVER1-NEXT: pushal # sched: [8:0.50]
-; ZNVER1-NEXT: pushfl # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "popa \0A\09 popf \0A\09 pusha \0A\09 pushf", ""()
- ret void
-}
-
-define void @test_ret() optsize {
-; GENERIC-LABEL: test_ret:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: retl
-; GENERIC-NEXT: retl $4095 # imm = 0xFFF
-; GENERIC-NEXT: lretl
-; GENERIC-NEXT: lretl $4095 # imm = 0xFFF
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_ret:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-; ATOM-NEXT: retl $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: lretl # sched: [79:39.50]
-; ATOM-NEXT: lretl $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [79:39.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_ret:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: retl # sched: [4:1.00]
-; SLM-NEXT: retl $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: lretl # sched: [4:1.00]
-; SLM-NEXT: lretl $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ret:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-; SANDY-NEXT: retl $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [6:1.00]
-; SANDY-NEXT: lretl # sched: [6:1.00]
-; SANDY-NEXT: lretl $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_ret:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-; HASWELL-NEXT: retl $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:2.00]
-; HASWELL-NEXT: lretl # sched: [6:0.50]
-; HASWELL-NEXT: lretl $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:2.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ret:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-; BROADWELL-NEXT: retl $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: lretl # sched: [6:0.50]
-; BROADWELL-NEXT: lretl $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_ret:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-; SKYLAKE-NEXT: retl $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: lretl # sched: [6:0.50]
-; SKYLAKE-NEXT: lretl $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_ret:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: retl # sched: [6:0.50]
-; SKX-NEXT: retl $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: lretl # sched: [6:0.50]
-; SKX-NEXT: lretl $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_ret:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-; BDVER2-NEXT: retl $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [5:1.00]
-; BDVER2-NEXT: lretl # sched: [5:1.00]
-; BDVER2-NEXT: lretl $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ret:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-; BTVER2-NEXT: retl $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: lretl # sched: [4:1.00]
-; BTVER2-NEXT: lretl $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ret:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
-; ZNVER1-NEXT: retl $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: lretl # sched: [1:0.50]
-; ZNVER1-NEXT: lretl $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "ret \0A\09 ret $0 \0A\09 lret \0A\09 lret $0", "i"(i16 4095)
- ret void
-}
-
-define i8 @test_salc() optsize {
-; GENERIC-LABEL: test_salc:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: salc
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_salc:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: salc # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_salc:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: salc # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_salc:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: salc # sched: [1:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_salc:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: salc # sched: [1:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_salc:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: salc # sched: [1:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_salc:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: salc # sched: [1:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_salc:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: salc # sched: [1:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_salc:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: salc # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_salc:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: salc # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_salc:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: salc # sched: [1:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = tail call i8 asm "salc", "=r"() nounwind
- ret i8 %1
-}
-
-; TODO - test_sgdt
-; TODO - test_sidt
-
-define void @test_xchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_xchg_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xchgl %eax, %eax
-; GENERIC-NEXT: xchgl %ecx, %eax
-; GENERIC-NEXT: xchgl %eax, (%edx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_xchg_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xchgl %eax, %eax # sched: [2:1.00]
-; ATOM-NEXT: xchgl %ecx, %eax # sched: [2:1.00]
-; ATOM-NEXT: xchgl %eax, (%edx) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_xchg_32:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: xchgl %eax, %eax # sched: [1:0.50]
-; SLM-NEXT: xchgl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: xchgl %eax, (%edx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xchg_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xchgl %eax, %eax # sched: [2:1.00]
-; SANDY-NEXT: xchgl %ecx, %eax # sched: [2:1.00]
-; SANDY-NEXT: xchgl %eax, (%edx) # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_xchg_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xchgl %eax, %eax # sched: [2:0.75]
-; HASWELL-NEXT: xchgl %ecx, %eax # sched: [2:0.75]
-; HASWELL-NEXT: xchgl %eax, (%edx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xchg_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xchgl %eax, %eax # sched: [2:0.75]
-; BROADWELL-NEXT: xchgl %ecx, %eax # sched: [2:0.75]
-; BROADWELL-NEXT: xchgl %eax, (%edx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_xchg_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xchgl %eax, %eax # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgl %ecx, %eax # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgl %eax, (%edx) # sched: [10:1.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_xchg_32:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: xchgl %eax, %eax # sched: [2:0.75]
-; SKX-NEXT: xchgl %ecx, %eax # sched: [2:0.75]
-; SKX-NEXT: xchgl %eax, (%edx) # sched: [10:1.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_xchg_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xchgl %eax, %eax # sched: [1:1.00]
-; BDVER2-NEXT: xchgl %ecx, %eax # sched: [1:1.00]
-; BDVER2-NEXT: xchgl %eax, (%edx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xchg_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xchgl %eax, %eax # sched: [1:0.50]
-; BTVER2-NEXT: xchgl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: xchgl %eax, (%edx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xchg_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xchgl %eax, %eax # sched: [1:0.50]
-; ZNVER1-NEXT: xchgl %ecx, %eax # sched: [1:0.50]
-; ZNVER1-NEXT: xchgl %eax, (%edx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "xchg %EAX, $0 \0A\09 xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2) nounwind
- ret void
-}
More information about the llvm-commits
mailing list