[llvm] r346916 - [X86] Don't mark SEXTLOADS with narrow types as Custom with -x86-experimental-vector-widening-legalization.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 14 16:21:41 PST 2018


Author: ctopper
Date: Wed Nov 14 16:21:41 2018
New Revision: 346916

URL: http://llvm.org/viewvc/llvm-project?rev=346916&view=rev
Log:
[X86] Don't mark SEXTLOADS with narrow types as Custom with -x86-experimental-vector-widening-legalization.

The narrow types end up requesting widening, but generic legalization will end up scalaring and using a build_vector to do the widening.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll
    llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll
    llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
    llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=346916&r1=346915&r2=346916&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Nov 14 16:21:41 2018
@@ -849,9 +849,12 @@ X86TargetLowering::X86TargetLowering(con
     // scalars) and extend in-register to a legal 128-bit vector type. For sext
     // loads these must work with a single scalar load.
     for (MVT VT : MVT::integer_vector_valuetypes()) {
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
+      if (!ExperimentalVectorWideningLegalization) {
+        // We don't want narrow result types here when widening.
+        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
+        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
+        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
+      }
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
@@ -860,6 +863,17 @@ X86TargetLowering::X86TargetLowering(con
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
     }
 
+    if (ExperimentalVectorWideningLegalization) {
+      // Explicitly code the list so we don't use narrow result types.
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8,  Custom);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Custom);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8,  Custom);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Custom);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8,  Custom);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8,  Custom);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8,  Custom);
+    }
+
     for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
@@ -979,17 +993,22 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
     }
 
-    for (MVT VT : MVT::integer_vector_valuetypes()) {
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+    if (!ExperimentalVectorWideningLegalization) {
+      // Avoid narrow result types when widening. The legal types are listed
+      // in the next loop.
+      for (MVT VT : MVT::integer_vector_valuetypes()) {
+        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
+        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
+        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+      }
     }
 
     // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
     for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
       setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8,  Legal);
       setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8,  Legal);
-      setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8,  Legal);
+      if (!ExperimentalVectorWideningLegalization)
+        setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8,  Legal);
       setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8,  Legal);
       setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
       setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);

Modified: llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll?rev=346916&r1=346915&r2=346916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll Wed Nov 14 16:21:41 2018
@@ -37,24 +37,20 @@ define void @mul_2xi8(i8* nocapture read
 ;
 ; X86-AVX-LABEL: mul_2xi8:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %edi
 ; X86-AVX-NEXT:    pushl %esi
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX-NEXT:    movl c, %esi
-; X86-AVX-NEXT:    movzbl 1(%edx,%ecx), %edi
-; X86-AVX-NEXT:    movzbl (%edx,%ecx), %edx
+; X86-AVX-NEXT:    movzwl (%edx,%ecx), %edx
 ; X86-AVX-NEXT:    vmovd %edx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT:    movzbl 1(%eax,%ecx), %edx
-; X86-AVX-NEXT:    movzbl (%eax,%ecx), %eax
+; X86-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X86-AVX-NEXT:    movzwl (%eax,%ecx), %eax
 ; X86-AVX-NEXT:    vmovd %eax, %xmm1
-; X86-AVX-NEXT:    vpinsrd $1, %edx, %xmm1, %xmm1
-; X86-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; X86-AVX-NEXT:    vpmaddwd %xmm0, %xmm1, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    popl %edi
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8:
@@ -75,15 +71,13 @@ define void @mul_2xi8(i8* nocapture read
 ; X64-AVX-LABEL: mul_2xi8:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movzbl 1(%rdi,%rdx), %ecx
-; X64-AVX-NEXT:    movzbl (%rdi,%rdx), %edi
-; X64-AVX-NEXT:    vmovd %edi, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT:    movzbl 1(%rsi,%rdx), %ecx
-; X64-AVX-NEXT:    movzbl (%rsi,%rdx), %esi
-; X64-AVX-NEXT:    vmovd %esi, %xmm1
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
-; X64-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT:    movzwl (%rdi,%rdx), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X64-AVX-NEXT:    movzwl (%rsi,%rdx), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm1
+; X64-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; X64-AVX-NEXT:    vpmaddwd %xmm0, %xmm1, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rdx,4)
 ; X64-AVX-NEXT:    retq
 entry:
@@ -923,24 +917,20 @@ define void @mul_2xi8_sext(i8* nocapture
 ;
 ; X86-AVX-LABEL: mul_2xi8_sext:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %edi
 ; X86-AVX-NEXT:    pushl %esi
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX-NEXT:    movl c, %esi
-; X86-AVX-NEXT:    movsbl 1(%edx,%ecx), %edi
-; X86-AVX-NEXT:    movsbl (%edx,%ecx), %edx
+; X86-AVX-NEXT:    movzwl (%edx,%ecx), %edx
 ; X86-AVX-NEXT:    vmovd %edx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT:    movsbl 1(%eax,%ecx), %edx
-; X86-AVX-NEXT:    movsbl (%eax,%ecx), %eax
+; X86-AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
+; X86-AVX-NEXT:    movzwl (%eax,%ecx), %eax
 ; X86-AVX-NEXT:    vmovd %eax, %xmm1
-; X86-AVX-NEXT:    vpinsrd $1, %edx, %xmm1, %xmm1
+; X86-AVX-NEXT:    vpmovsxbd %xmm1, %xmm1
 ; X86-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    popl %edi
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8_sext:
@@ -963,14 +953,12 @@ define void @mul_2xi8_sext(i8* nocapture
 ; X64-AVX-LABEL: mul_2xi8_sext:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movsbl 1(%rdi,%rdx), %ecx
-; X64-AVX-NEXT:    movsbl (%rdi,%rdx), %edi
-; X64-AVX-NEXT:    vmovd %edi, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT:    movsbl 1(%rsi,%rdx), %ecx
-; X64-AVX-NEXT:    movsbl (%rsi,%rdx), %esi
-; X64-AVX-NEXT:    vmovd %esi, %xmm1
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX-NEXT:    movzwl (%rdi,%rdx), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
+; X64-AVX-NEXT:    movzwl (%rsi,%rdx), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm1
+; X64-AVX-NEXT:    vpmovsxbd %xmm1, %xmm1
 ; X64-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rdx,4)
 ; X64-AVX-NEXT:    retq
@@ -1023,24 +1011,20 @@ define void @mul_2xi8_sext_zext(i8* noca
 ;
 ; X86-AVX-LABEL: mul_2xi8_sext_zext:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %edi
 ; X86-AVX-NEXT:    pushl %esi
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX-NEXT:    movl c, %esi
-; X86-AVX-NEXT:    movsbl 1(%edx,%ecx), %edi
-; X86-AVX-NEXT:    movsbl (%edx,%ecx), %edx
+; X86-AVX-NEXT:    movzwl (%edx,%ecx), %edx
 ; X86-AVX-NEXT:    vmovd %edx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT:    movzbl 1(%eax,%ecx), %edx
-; X86-AVX-NEXT:    movzbl (%eax,%ecx), %eax
+; X86-AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
+; X86-AVX-NEXT:    movzwl (%eax,%ecx), %eax
 ; X86-AVX-NEXT:    vmovd %eax, %xmm1
-; X86-AVX-NEXT:    vpinsrd $1, %edx, %xmm1, %xmm1
+; X86-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 ; X86-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    popl %edi
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8_sext_zext:
@@ -1064,14 +1048,12 @@ define void @mul_2xi8_sext_zext(i8* noca
 ; X64-AVX-LABEL: mul_2xi8_sext_zext:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movsbl 1(%rdi,%rdx), %ecx
-; X64-AVX-NEXT:    movsbl (%rdi,%rdx), %edi
-; X64-AVX-NEXT:    vmovd %edi, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT:    movzbl 1(%rsi,%rdx), %ecx
-; X64-AVX-NEXT:    movzbl (%rsi,%rdx), %esi
-; X64-AVX-NEXT:    vmovd %esi, %xmm1
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX-NEXT:    movzwl (%rdi,%rdx), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
+; X64-AVX-NEXT:    movzwl (%rsi,%rdx), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm1
+; X64-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 ; X64-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rdx,4)
 ; X64-AVX-NEXT:    retq
@@ -1118,24 +1100,18 @@ define void @mul_2xi16_sext(i8* nocaptur
 ;
 ; X86-AVX-LABEL: mul_2xi16_sext:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %edi
 ; X86-AVX-NEXT:    pushl %esi
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX-NEXT:    movl c, %esi
-; X86-AVX-NEXT:    movswl 2(%edx,%ecx), %edi
-; X86-AVX-NEXT:    movswl (%edx,%ecx), %edx
-; X86-AVX-NEXT:    vmovd %edx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT:    movswl 2(%eax,%ecx), %edx
-; X86-AVX-NEXT:    movswl (%eax,%ecx), %eax
-; X86-AVX-NEXT:    vmovd %eax, %xmm1
-; X86-AVX-NEXT:    vpinsrd $1, %edx, %xmm1, %xmm1
+; X86-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX-NEXT:    vpmovsxwd %xmm1, %xmm1
 ; X86-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    popl %edi
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi16_sext:
@@ -1153,14 +1129,10 @@ define void @mul_2xi16_sext(i8* nocaptur
 ; X64-AVX-LABEL: mul_2xi16_sext:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movswl 2(%rdi,%rdx), %ecx
-; X64-AVX-NEXT:    movswl (%rdi,%rdx), %edi
-; X64-AVX-NEXT:    vmovd %edi, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT:    movswl 2(%rsi,%rdx), %ecx
-; X64-AVX-NEXT:    movswl (%rsi,%rdx), %esi
-; X64-AVX-NEXT:    vmovd %esi, %xmm1
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
+; X64-AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-AVX-NEXT:    vpmovsxwd %xmm1, %xmm1
 ; X64-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rdx,4)
 ; X64-AVX-NEXT:    retq
@@ -1213,22 +1185,18 @@ define void @mul_2xi16_sext_zext(i8* noc
 ;
 ; X86-AVX-LABEL: mul_2xi16_sext_zext:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %edi
 ; X86-AVX-NEXT:    pushl %esi
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX-NEXT:    movl c, %esi
-; X86-AVX-NEXT:    movswl 2(%edx,%ecx), %edi
-; X86-AVX-NEXT:    movswl (%edx,%ecx), %edx
-; X86-AVX-NEXT:    vmovd %edx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X86-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
 ; X86-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    popl %edi
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi16_sext_zext:
@@ -1252,10 +1220,8 @@ define void @mul_2xi16_sext_zext(i8* noc
 ; X64-AVX-LABEL: mul_2xi16_sext_zext:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movswl 2(%rdi,%rdx), %ecx
-; X64-AVX-NEXT:    movswl (%rdi,%rdx), %edi
-; X64-AVX-NEXT:    vmovd %edi, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X64-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
 ; X64-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
@@ -1460,20 +1426,14 @@ define void @mul_2xi8_varconst1(i8* noca
 ;
 ; X86-AVX-LABEL: mul_2xi8_varconst1:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %esi, -8
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl c, %edx
-; X86-AVX-NEXT:    movzbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT:    movzbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT:    movzwl (%ecx,%eax), %ecx
 ; X86-AVX-NEXT:    vmovd %ecx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; X86-AVX-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8_varconst1:
@@ -1491,10 +1451,9 @@ define void @mul_2xi8_varconst1(i8* noca
 ; X64-AVX-LABEL: mul_2xi8_varconst1:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movzbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT:    movzbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT:    vmovd %edx, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT:    movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rsi,4)
 ; X64-AVX-NEXT:    retq
@@ -1534,20 +1493,14 @@ define void @mul_2xi8_varconst2(i8* noca
 ;
 ; X86-AVX-LABEL: mul_2xi8_varconst2:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %esi, -8
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl c, %edx
-; X86-AVX-NEXT:    movsbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT:    movsbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT:    movzwl (%ecx,%eax), %ecx
 ; X86-AVX-NEXT:    vmovd %ecx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8_varconst2:
@@ -1566,10 +1519,9 @@ define void @mul_2xi8_varconst2(i8* noca
 ; X64-AVX-LABEL: mul_2xi8_varconst2:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movsbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT:    movsbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT:    vmovd %edx, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT:    movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rsi,4)
 ; X64-AVX-NEXT:    retq
@@ -1611,20 +1563,14 @@ define void @mul_2xi8_varconst3(i8* noca
 ;
 ; X86-AVX-LABEL: mul_2xi8_varconst3:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %esi, -8
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl c, %edx
-; X86-AVX-NEXT:    movzbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT:    movzbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT:    movzwl (%ecx,%eax), %ecx
 ; X86-AVX-NEXT:    vmovd %ecx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; X86-AVX-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8_varconst3:
@@ -1645,10 +1591,9 @@ define void @mul_2xi8_varconst3(i8* noca
 ; X64-AVX-LABEL: mul_2xi8_varconst3:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movzbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT:    movzbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT:    vmovd %edx, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT:    movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rsi,4)
 ; X64-AVX-NEXT:    retq
@@ -1690,20 +1635,14 @@ define void @mul_2xi8_varconst4(i8* noca
 ;
 ; X86-AVX-LABEL: mul_2xi8_varconst4:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %esi, -8
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl c, %edx
-; X86-AVX-NEXT:    movzbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT:    movzbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT:    movzwl (%ecx,%eax), %ecx
 ; X86-AVX-NEXT:    vmovd %ecx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; X86-AVX-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8_varconst4:
@@ -1724,10 +1663,9 @@ define void @mul_2xi8_varconst4(i8* noca
 ; X64-AVX-LABEL: mul_2xi8_varconst4:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movzbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT:    movzbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT:    vmovd %edx, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT:    movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rsi,4)
 ; X64-AVX-NEXT:    retq
@@ -1769,20 +1707,14 @@ define void @mul_2xi8_varconst5(i8* noca
 ;
 ; X86-AVX-LABEL: mul_2xi8_varconst5:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %esi, -8
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl c, %edx
-; X86-AVX-NEXT:    movsbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT:    movsbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT:    movzwl (%ecx,%eax), %ecx
 ; X86-AVX-NEXT:    vmovd %ecx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8_varconst5:
@@ -1803,10 +1735,9 @@ define void @mul_2xi8_varconst5(i8* noca
 ; X64-AVX-LABEL: mul_2xi8_varconst5:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movsbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT:    movsbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT:    vmovd %edx, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT:    movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rsi,4)
 ; X64-AVX-NEXT:    retq
@@ -1848,20 +1779,14 @@ define void @mul_2xi8_varconst6(i8* noca
 ;
 ; X86-AVX-LABEL: mul_2xi8_varconst6:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %esi, -8
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl c, %edx
-; X86-AVX-NEXT:    movsbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT:    movsbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT:    movzwl (%ecx,%eax), %ecx
 ; X86-AVX-NEXT:    vmovd %ecx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi8_varconst6:
@@ -1882,10 +1807,9 @@ define void @mul_2xi8_varconst6(i8* noca
 ; X64-AVX-LABEL: mul_2xi8_varconst6:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movsbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT:    movsbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT:    vmovd %edx, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT:    movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rsi,4)
 ; X64-AVX-NEXT:    retq
@@ -1988,20 +1912,13 @@ define void @mul_2xi16_varconst2(i8* noc
 ;
 ; X86-AVX-LABEL: mul_2xi16_varconst2:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %esi, -8
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl c, %edx
-; X86-AVX-NEXT:    movswl 2(%ecx,%eax), %esi
-; X86-AVX-NEXT:    movswl (%ecx,%eax), %ecx
-; X86-AVX-NEXT:    vmovd %ecx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi16_varconst2:
@@ -2019,10 +1936,8 @@ define void @mul_2xi16_varconst2(i8* noc
 ; X64-AVX-LABEL: mul_2xi16_varconst2:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movswl 2(%rdi,%rsi), %ecx
-; X64-AVX-NEXT:    movswl (%rdi,%rsi), %edx
-; X64-AVX-NEXT:    vmovd %edx, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rsi,4)
 ; X64-AVX-NEXT:    retq
@@ -2137,20 +2052,13 @@ define void @mul_2xi16_varconst4(i8* noc
 ;
 ; X86-AVX-LABEL: mul_2xi16_varconst4:
 ; X86-AVX:       # %bb.0: # %entry
-; X86-AVX-NEXT:    pushl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %esi, -8
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl c, %edx
-; X86-AVX-NEXT:    movswl 2(%ecx,%eax), %esi
-; X86-AVX-NEXT:    movswl (%ecx,%eax), %ecx
-; X86-AVX-NEXT:    vmovd %ecx, %xmm0
-; X86-AVX-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT:    popl %esi
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: mul_2xi16_varconst4:
@@ -2172,10 +2080,8 @@ define void @mul_2xi16_varconst4(i8* noc
 ; X64-AVX-LABEL: mul_2xi16_varconst4:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    movswl 2(%rdi,%rsi), %ecx
-; X64-AVX-NEXT:    movswl (%rdi,%rsi), %edx
-; X64-AVX-NEXT:    vmovd %edx, %xmm0
-; X64-AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rsi,4)
 ; X64-AVX-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll?rev=346916&r1=346915&r2=346916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll Wed Nov 14 16:21:41 2018
@@ -3168,19 +3168,15 @@ define <2 x double> @sitofp_load_2i16_to
 ;
 ; SSE41-LABEL: sitofp_load_2i16_to_2f64:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movswl 2(%rdi), %eax
-; SSE41-NEXT:    movswl (%rdi), %ecx
-; SSE41-NEXT:    movd %ecx, %xmm0
-; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
+; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
 ; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: sitofp_load_2i16_to_2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movswl 2(%rdi), %eax
-; AVX-NEXT:    movswl (%rdi), %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
-; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %ld = load <2 x i16>, <2 x i16> *%a
@@ -3201,19 +3197,17 @@ define <2 x double> @sitofp_load_2i8_to_
 ;
 ; SSE41-LABEL: sitofp_load_2i8_to_2f64:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movsbl 1(%rdi), %eax
-; SSE41-NEXT:    movsbl (%rdi), %ecx
-; SSE41-NEXT:    movd %ecx, %xmm0
-; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
+; SSE41-NEXT:    movzwl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
 ; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: sitofp_load_2i8_to_2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movsbl 1(%rdi), %eax
-; AVX-NEXT:    movsbl (%rdi), %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
-; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    movzwl (%rdi), %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %ld = load <2 x i8>, <2 x i8> *%a
@@ -3606,19 +3600,17 @@ define <2 x double> @uitofp_load_2i8_to_
 ;
 ; SSE41-LABEL: uitofp_load_2i8_to_2f64:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movzbl 1(%rdi), %eax
-; SSE41-NEXT:    movzbl (%rdi), %ecx
-; SSE41-NEXT:    movd %ecx, %xmm0
-; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
+; SSE41-NEXT:    movzwl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; SSE41-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: uitofp_load_2i8_to_2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl 1(%rdi), %eax
-; AVX-NEXT:    movzbl (%rdi), %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
-; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    movzwl (%rdi), %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %ld = load <2 x i8>, <2 x i8> *%a

Modified: llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll?rev=346916&r1=346915&r2=346916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll Wed Nov 14 16:21:41 2018
@@ -5062,29 +5062,26 @@ define <2 x i32> @sext_2i8_to_2i32(<2 x
 ;
 ; SSE41-LABEL: sext_2i8_to_2i32:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movsbl 1(%rdi), %eax
-; SSE41-NEXT:    movsbl (%rdi), %ecx
-; SSE41-NEXT:    movd %ecx, %xmm0
-; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
+; SSE41-NEXT:    movzwl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
 ; SSE41-NEXT:    paddd %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: sext_2i8_to_2i32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movsbl 1(%rdi), %eax
-; AVX-NEXT:    movsbl (%rdi), %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
-; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    movzwl (%rdi), %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; AVX-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; X32-SSE41-LABEL: sext_2i8_to_2i32:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movsbl 1(%eax), %ecx
-; X32-SSE41-NEXT:    movsbl (%eax), %eax
+; X32-SSE41-NEXT:    movzwl (%eax), %eax
 ; X32-SSE41-NEXT:    movd %eax, %xmm0
-; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
+; X32-SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
 ; X32-SSE41-NEXT:    paddd %xmm0, %xmm0
 ; X32-SSE41-NEXT:    retl
   %x = load <2 x i8>, <2 x i8>* %addr, align 1

Modified: llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll?rev=346916&r1=346915&r2=346916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll Wed Nov 14 16:21:41 2018
@@ -2272,19 +2272,17 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x
 ;
 ; SSE41-LABEL: zext_2i8_to_2i32:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movzbl 1(%rdi), %eax
-; SSE41-NEXT:    movzbl (%rdi), %ecx
-; SSE41-NEXT:    movd %ecx, %xmm0
-; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
+; SSE41-NEXT:    movzwl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; SSE41-NEXT:    paddd %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: zext_2i8_to_2i32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl 1(%rdi), %eax
-; AVX-NEXT:    movzbl (%rdi), %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
-; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    movzwl (%rdi), %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %x = load <2 x i8>, <2 x i8>* %addr, align 1




More information about the llvm-commits mailing list