[llvm] r317441 - [X86] Remove some more RCP and RSQRT patterns from InstrAVX512.td that I missed in r317413.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 5 13:14:06 PST 2017
Author: ctopper
Date: Sun Nov 5 13:14:05 2017
New Revision: 317441
URL: http://llvm.org/viewvc/llvm-project?rev=317441&view=rev
Log:
[X86] Remove some more RCP and RSQRT patterns from InstrAVX512.td that I missed in r317413.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=317441&r1=317440&r2=317441&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Nov 5 13:14:05 2017
@@ -7641,19 +7641,6 @@ defm VSQRT : avx512_sqrt_packed_all<0x
defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
-let Predicates = [HasAVX512] in {
- def : Pat<(f32 (X86frsqrt FR32X:$src)),
- (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
- def : Pat<(f32 (X86frsqrt (load addr:$src))),
- (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
- Requires<[OptForSize]>;
- def : Pat<(f32 (X86frcp FR32X:$src)),
- (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
- def : Pat<(f32 (X86frcp (load addr:$src))),
- (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
- Requires<[OptForSize]>;
-}
-
multiclass
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=317441&r1=317440&r2=317441&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Nov 5 13:14:05 2017
@@ -3095,7 +3095,7 @@ multiclass avx_fp_unop_s<bits<8> opc, st
ValueType vt, ValueType ScalarVT,
X86MemOperand x86memop,
Intrinsic Intr, SDNode OpNode, Domain d,
- OpndItins itins, string Suffix> {
+ OpndItins itins, Predicate target, string Suffix> {
let hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -3126,7 +3126,7 @@ multiclass avx_fp_unop_s<bits<8> opc, st
// vrcpss mem, %xmm0, %xmm0
// TODO: In theory, we could fold the load, and avoid the stall caused by
// the partial register store, either in ExecutionDepsFix or with smarter RA.
- let Predicates = [UseAVX] in {
+ let Predicates = [target] in {
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
}
@@ -3140,7 +3140,7 @@ multiclass avx_fp_unop_s<bits<8> opc, st
(!cast<Instruction>("V"#NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), addr:$src2)>;
}
- let Predicates = [UseAVX, OptForSize] in {
+ let Predicates = [target, OptForSize] in {
def : Pat<(ScalarVT (OpNode (load addr:$src))),
(!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
addr:$src)>;
@@ -3220,40 +3220,40 @@ let Predicates = [HasAVX, NoVLX] in {
}
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
+ OpndItins itins, Predicate AVXTarget> {
defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
SSEPackedSingle, itins, UseSSE1, "SS">, XS;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
f32mem,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
- SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG,
- NotMemoryFoldable;
+ SSEPackedSingle, itins, AVXTarget, "SS">, XS, VEX_4V,
+ VEX_LIG, VEX_WIG, NotMemoryFoldable;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
+ OpndItins itins, Predicate AVXTarget> {
defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD;
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
f64mem,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
- OpNode, SSEPackedDouble, itins, "SD">,
+ OpNode, SSEPackedDouble, itins, AVXTarget, "SD">,
XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
}
// Square root.
-defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS, UseAVX>,
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX, NoVLX]>,
- sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD, UseAVX>,
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>,
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS, HasAVX>,
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX]>;
-defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
+defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS, HasAVX>,
sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX]>;
// There is no f64 version of the reciprocal approximation instructions.
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath.ll?rev=317441&r1=317440&r2=317441&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll Sun Nov 5 13:14:05 2017
@@ -144,14 +144,14 @@ define float @f32_one_step(float %x) #1
;
; KNL-LABEL: f32_one_step:
; KNL: # BB#0:
-; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
+; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: f32_one_step:
; SKX: # BB#0:
-; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
+; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [9:0.50]
; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -257,7 +257,7 @@ define float @f32_two_step(float %x) #2
;
; KNL-LABEL: f32_two_step:
; KNL: # BB#0:
-; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
+; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
@@ -268,7 +268,7 @@ define float @f32_two_step(float %x) #2
;
; SKX-LABEL: f32_two_step:
; SKX: # BB#0:
-; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
+; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33]
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll?rev=317441&r1=317440&r2=317441&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll Sun Nov 5 13:14:05 2017
@@ -56,13 +56,13 @@ define float @f32_no_step_2(float %x) #3
;
; KNL-LABEL: f32_no_step_2:
; KNL: # BB#0:
-; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
+; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: f32_no_step_2:
; SKX: # BB#0:
-; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
+; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 1234.0, %x
@@ -144,7 +144,7 @@ define float @f32_one_step_2(float %x) #
;
; KNL-LABEL: f32_one_step_2:
; KNL: # BB#0:
-; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
+; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
@@ -152,7 +152,7 @@ define float @f32_one_step_2(float %x) #
;
; SKX-LABEL: f32_one_step_2:
; SKX: # BB#0:
-; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
+; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [9:0.50]
; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
@@ -243,7 +243,7 @@ define float @f32_one_step_2_divs(float
;
; KNL-LABEL: f32_one_step_2_divs:
; KNL: # BB#0:
-; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
+; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
@@ -252,7 +252,7 @@ define float @f32_one_step_2_divs(float
;
; SKX-LABEL: f32_one_step_2_divs:
; SKX: # BB#0:
-; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
+; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [9:0.50]
; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
@@ -368,7 +368,7 @@ define float @f32_two_step_2(float %x) #
;
; KNL-LABEL: f32_two_step_2:
; KNL: # BB#0:
-; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
+; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
@@ -380,7 +380,7 @@ define float @f32_two_step_2(float %x) #
;
; SKX-LABEL: f32_two_step_2:
; SKX: # BB#0:
-; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
+; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33]
More information about the llvm-commits
mailing list