[llvm] r337119 - [X86] Add some optsize patterns for 256-bit X86vzmovl.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 14 23:03:20 PDT 2018
Author: ctopper
Date: Sat Jul 14 23:03:19 2018
New Revision: 337119
URL: http://llvm.org/viewvc/llvm-project?rev=337119&view=rev
Log:
[X86] Add some optsize patterns for 256-bit X86vzmovl.
These patterns use VMOVSS/SD. Without optsize we use BLENDI instead.
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=337119&r1=337118&r2=337119&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Jul 14 23:03:19 2018
@@ -304,6 +304,25 @@ let Predicates = [UseAVX, OptForSize] in
(VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
(VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
+
+ // Move low f32 and clear high bits.
+ def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>;
+
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (v2f64 (V_SET0)),
+ (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (v2i64 (V_SET0)),
+ (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>;
}
let Predicates = [UseSSE1] in {
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=337119&r1=337118&r2=337119&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Sat Jul 14 23:03:19 2018
@@ -1882,3 +1882,67 @@ entry:
%add = add <4 x i64> %shuffle, %shuffle1
ret <4 x i64> %add
}
+
+define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize {
+; AVX1OR2-LABEL: shuffle_v4f64_0zzz_optsize:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0zzz_optsize:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512VL-NEXT: retq
+ %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x double> %b
+}
+
+define <4 x i64> @shuffle_v4i64_0zzz_optsize(<4 x i64> %a) optsize {
+; AVX1OR2-LABEL: shuffle_v4i64_0zzz_optsize:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0zzz_optsize:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512VL-NEXT: retq
+ %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x i64> %b
+}
+
+define <8 x float> @shuffle_v8f32_0zzzzzzz_optsize(<8 x float> %a) optsize {
+; AVX1OR2-LABEL: shuffle_v8f32_0zzzzzzz_optsize:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v8f32_0zzzzzzz_optsize:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512VL-NEXT: retq
+ %b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x float> %b
+}
+
+define <8 x i32> @shuffle_v8i32_0zzzzzzz_optsize(<8 x i32> %a) optsize {
+; AVX1OR2-LABEL: shuffle_v8i32_0zzzzzzz_optsize:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v8i32_0zzzzzzz_optsize:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512VL-NEXT: retq
+ %b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i32> %b
+}
More information about the llvm-commits
mailing list