[llvm] r290564 - [AVX-512] Add isel patterns to turn native masked scalar add/sub/mul/div into masked instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 26 17:56:25 PST 2016
Author: ctopper
Date: Mon Dec 26 19:56:24 2016
New Revision: 290564
URL: http://llvm.org/viewvc/llvm-project?rev=290564&view=rev
Log:
[AVX-512] Add isel patterns to turn native masked scalar add/sub/mul/div into masked instructions.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=290564&r1=290563&r2=290564&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Dec 26 19:56:24 2016
@@ -9117,6 +9117,17 @@ multiclass AVX512_scalar_math_f32_patter
def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst),
(Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)), (i8 1))),
(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
+
+ // extracted masked scalar math op with insert via movss
+ def : Pat<(X86Movss (v4f32 VR128X:$src1),
+ (scalar_to_vector
+ (X86selects VK1WM:$mask,
+ (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
+ FR32X:$src2),
+ FR32X:$src0))),
+ (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
+ VK1WM:$mask, v4f32:$src1,
+ (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
}
}
@@ -9150,6 +9161,17 @@ multiclass AVX512_scalar_math_f64_patter
def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst),
(Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)), (i8 1))),
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
+
+ // extracted masked scalar math op with insert via movss
+ def : Pat<(X86Movsd (v2f64 VR128X:$src1),
+ (scalar_to_vector
+ (X86selects VK1WM:$mask,
+ (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
+ FR64X:$src2),
+ FR64X:$src0))),
+ (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
+ VK1WM:$mask, v2f64:$src1,
+ (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
}
}
Modified: llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll?rev=290564&r1=290563&r2=290564&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll Mon Dec 26 19:56:24 2016
@@ -1119,9 +1119,9 @@ define <4 x float> @add_ss_mask(<4 x flo
;
; AVX512-LABEL: add_ss_mask:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: andl $1, %edi
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%1 = extractelement <4 x float> %a, i64 0
@@ -1174,9 +1174,9 @@ define <2 x double> @add_sd_mask(<2 x do
;
; AVX512-LABEL: add_sd_mask:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: andl $1, %edi
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%1 = extractelement <2 x double> %a, i64 0
More information about the llvm-commits
mailing list