[llvm] r315802 - [X86] Add patterns for vzmovl+cvtpd2dq/cvttpd2dq with a load.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 14 00:04:48 PDT 2017


Author: ctopper
Date: Sat Oct 14 00:04:48 2017
New Revision: 315802

URL: http://llvm.org/viewvc/llvm-project?rev=315802&view=rev
Log:
[X86] Add patterns for vzmovl+cvtpd2dq/cvttpd2dq with a load.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=315802&r1=315801&r2=315802&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Oct 14 00:04:48 2017
@@ -7032,12 +7032,18 @@ let Predicates = [HasAVX512, HasVLX] in
                                 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
               (VCVTPD2DQZ128rr VR128X:$src)>;
     def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
+              (VCVTPD2DQZ128rm addr:$src)>;
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
                                  (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
               (VCVTPD2UDQZ128rr VR128X:$src)>;
     def : Pat<(X86vzmovl (v2i64 (bitconvert
                                 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
               (VCVTTPD2DQZ128rr VR128X:$src)>;
     def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
+              (VCVTTPD2DQZ128rm addr:$src)>;
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
                                  (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
               (VCVTTPD2UDQZ128rr VR128X:$src)>;
   }

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=315802&r1=315801&r2=315802&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Oct 14 00:04:48 2017
@@ -1890,8 +1890,14 @@ let Predicates = [HasAVX, NoVLX] in {
                                  (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
               (VCVTPD2DQrr VR128:$src)>;
     def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
+              (VCVTPD2DQrm addr:$src)>;
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
                                  (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
               (VCVTTPD2DQrr VR128:$src)>;
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
+              (VCVTTPD2DQrm addr:$src)>;
   }
 } // Predicates = [HasAVX]
 
@@ -1912,8 +1918,14 @@ let Predicates = [UseSSE2] in {
                                  (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
               (CVTPD2DQrr VR128:$src)>;
     def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                 (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))),
+              (CVTPD2DQrm addr:$src)>;
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
                                  (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
               (CVTTPD2DQrr VR128:$src)>;
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                 (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))),
+              (CVTTPD2DQrm addr:$src)>;
   }
 } // Predicates = [UseSSE2]
 
@@ -2071,7 +2083,7 @@ let Predicates = [UseSSE2] in {
                                  (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
               (CVTPD2PSrr VR128:$src)>;
     def : Pat<(X86vzmovl (v2f64 (bitconvert
-                                 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
+                                 (v4f32 (X86vfpround (memopv2f64 addr:$src)))))),
               (CVTPD2PSrm addr:$src)>;
   }
 }

Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=315802&r1=315801&r2=315802&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Sat Oct 14 00:04:48 2017
@@ -273,6 +273,32 @@ define <2 x i64> @test_mm_cvtpd_epi32_ze
 }
 
 
+define <2 x i64> @test_mm_cvtpd_epi32_zext_load(<2 x double>* %p0) nounwind {
+; SSE-LABEL: test_mm_cvtpd_epi32_zext_load:
+; SSE:       ## BB#0:
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; SSE-NEXT:    cvtpd2dq (%eax), %xmm0 ## encoding: [0xf2,0x0f,0xe6,0x00]
+; SSE-NEXT:    retl ## encoding: [0xc3]
+;
+; AVX2-LABEL: test_mm_cvtpd_epi32_zext_load:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; AVX2-NEXT:    vcvtpd2dqx (%eax), %xmm0 ## encoding: [0xc5,0xfb,0xe6,0x00]
+; AVX2-NEXT:    retl ## encoding: [0xc3]
+;
+; SKX-LABEL: test_mm_cvtpd_epi32_zext_load:
+; SKX:       ## BB#0:
+; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; SKX-NEXT:    vcvtpd2dqx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0x00]
+; SKX-NEXT:    retl ## encoding: [0xc3]
+  %a0 = load <2 x double>, <2 x double>* %p0
+  %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
+  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %bc = bitcast <4 x i32> %res to <2 x i64>
+  ret <2 x i64> %bc
+}
+
+
 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
 ; SSE-LABEL: test_x86_sse2_cvtpd2ps:
 ; SSE:       ## BB#0:
@@ -542,6 +568,32 @@ define <2 x i64> @test_mm_cvttpd_epi32_z
 }
 
 
+define <2 x i64> @test_mm_cvttpd_epi32_zext_load(<2 x double>* %p0) nounwind {
+; SSE-LABEL: test_mm_cvttpd_epi32_zext_load:
+; SSE:       ## BB#0:
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; SSE-NEXT:    cvttpd2dq (%eax), %xmm0 ## encoding: [0x66,0x0f,0xe6,0x00]
+; SSE-NEXT:    retl ## encoding: [0xc3]
+;
+; AVX2-LABEL: test_mm_cvttpd_epi32_zext_load:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; AVX2-NEXT:    vcvttpd2dqx (%eax), %xmm0 ## encoding: [0xc5,0xf9,0xe6,0x00]
+; AVX2-NEXT:    retl ## encoding: [0xc3]
+;
+; SKX-LABEL: test_mm_cvttpd_epi32_zext_load:
+; SKX:       ## BB#0:
+; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; SKX-NEXT:    vcvttpd2dqx (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0x00]
+; SKX-NEXT:    retl ## encoding: [0xc3]
+  %a0 = load <2 x double>, <2 x double>* %p0
+  %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
+  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %bc = bitcast <4 x i32> %res to <2 x i64>
+  ret <2 x i64> %bc
+}
+
+
 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
 ; SSE-LABEL: test_x86_sse2_cvttps2dq:
 ; SSE:       ## BB#0:
@@ -710,21 +762,21 @@ define <8 x i16> @test_x86_sse2_packssdw
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
 ; SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
-; SSE-NEXT:    ## fixup A - offset: 3, value: LCPI33_0, kind: FK_Data_4
+; SSE-NEXT:    ## fixup A - offset: 3, value: LCPI35_0, kind: FK_Data_4
 ; SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; AVX2-LABEL: test_x86_sse2_packssdw_128_fold:
 ; AVX2:       ## BB#0:
 ; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
 ; AVX2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
-; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI33_0, kind: FK_Data_4
+; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4
 ; AVX2-NEXT:    retl ## encoding: [0xc3]
 ;
 ; SKX-LABEL: test_x86_sse2_packssdw_128_fold:
 ; SKX:       ## BB#0:
-; SKX-NEXT:    vmovaps LCPI33_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768]
+; SKX-NEXT:    vmovaps LCPI35_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768]
 ; SKX-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
-; SKX-NEXT:    ## fixup A - offset: 4, value: LCPI33_0, kind: FK_Data_4
+; SKX-NEXT:    ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4
 ; SKX-NEXT:    retl ## encoding: [0xc3]
   %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
   ret <8 x i16> %res
@@ -757,21 +809,21 @@ define <16 x i8> @test_x86_sse2_packsswb
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
 ; SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
-; SSE-NEXT:    ## fixup A - offset: 3, value: LCPI35_0, kind: FK_Data_4
+; SSE-NEXT:    ## fixup A - offset: 3, value: LCPI37_0, kind: FK_Data_4
 ; SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; AVX2-LABEL: test_x86_sse2_packsswb_128_fold:
 ; AVX2:       ## BB#0:
 ; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
 ; AVX2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
-; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4
+; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI37_0, kind: FK_Data_4
 ; AVX2-NEXT:    retl ## encoding: [0xc3]
 ;
 ; SKX-LABEL: test_x86_sse2_packsswb_128_fold:
 ; SKX:       ## BB#0:
-; SKX-NEXT:    vmovaps LCPI35_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
+; SKX-NEXT:    vmovaps LCPI37_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
 ; SKX-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
-; SKX-NEXT:    ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4
+; SKX-NEXT:    ## fixup A - offset: 4, value: LCPI37_0, kind: FK_Data_4
 ; SKX-NEXT:    retl ## encoding: [0xc3]
   %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
   ret <16 x i8> %res
@@ -804,21 +856,21 @@ define <16 x i8> @test_x86_sse2_packuswb
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
 ; SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
-; SSE-NEXT:    ## fixup A - offset: 3, value: LCPI37_0, kind: FK_Data_4
+; SSE-NEXT:    ## fixup A - offset: 3, value: LCPI39_0, kind: FK_Data_4
 ; SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; AVX2-LABEL: test_x86_sse2_packuswb_128_fold:
 ; AVX2:       ## BB#0:
 ; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
 ; AVX2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
-; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI37_0, kind: FK_Data_4
+; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI39_0, kind: FK_Data_4
 ; AVX2-NEXT:    retl ## encoding: [0xc3]
 ;
 ; SKX-LABEL: test_x86_sse2_packuswb_128_fold:
 ; SKX:       ## BB#0:
-; SKX-NEXT:    vmovaps LCPI37_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; SKX-NEXT:    vmovaps LCPI39_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
 ; SKX-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
-; SKX-NEXT:    ## fixup A - offset: 4, value: LCPI37_0, kind: FK_Data_4
+; SKX-NEXT:    ## fixup A - offset: 4, value: LCPI39_0, kind: FK_Data_4
 ; SKX-NEXT:    retl ## encoding: [0xc3]
   %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
   ret <16 x i8> %res




More information about the llvm-commits mailing list