[llvm] r346916 - [X86] Don't mark SEXTLOADS with narrow types as Custom with -x86-experimental-vector-widening-legalization.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 14 16:21:41 PST 2018
Author: ctopper
Date: Wed Nov 14 16:21:41 2018
New Revision: 346916
URL: http://llvm.org/viewvc/llvm-project?rev=346916&view=rev
Log:
[X86] Don't mark SEXTLOADS with narrow types as Custom with -x86-experimental-vector-widening-legalization.
The narrow types end up requesting widening, but generic legalization will end up scalaring and using a build_vector to do the widening.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll
llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll
llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=346916&r1=346915&r2=346916&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Nov 14 16:21:41 2018
@@ -849,9 +849,12 @@ X86TargetLowering::X86TargetLowering(con
// scalars) and extend in-register to a legal 128-bit vector type. For sext
// loads these must work with a single scalar load.
for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
+ if (!ExperimentalVectorWideningLegalization) {
+ // We don't want narrow result types here when widening.
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
+ }
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
@@ -860,6 +863,17 @@ X86TargetLowering::X86TargetLowering(con
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
}
+ if (ExperimentalVectorWideningLegalization) {
+ // Explicitly code the list so we don't use narrow result types.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Custom);
+ }
+
for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
@@ -979,17 +993,22 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
}
- for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+ if (!ExperimentalVectorWideningLegalization) {
+ // Avoid narrow result types when widening. The legal types are listed
+ // in the next loop.
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+ }
}
// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
+ if (!ExperimentalVectorWideningLegalization)
+ setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
Modified: llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll?rev=346916&r1=346915&r2=346916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll Wed Nov 14 16:21:41 2018
@@ -37,24 +37,20 @@ define void @mul_2xi8(i8* nocapture read
;
; X86-AVX-LABEL: mul_2xi8:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %edi
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movzbl 1(%edx,%ecx), %edi
-; X86-AVX-NEXT: movzbl (%edx,%ecx), %edx
+; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx
; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT: movzbl 1(%eax,%ecx), %edx
-; X86-AVX-NEXT: movzbl (%eax,%ecx), %eax
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax
; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
-; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; X86-AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8:
@@ -75,15 +71,13 @@ define void @mul_2xi8(i8* nocapture read
; X64-AVX-LABEL: mul_2xi8:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzbl 1(%rdi,%rdx), %ecx
-; X64-AVX-NEXT: movzbl (%rdi,%rdx), %edi
-; X64-AVX-NEXT: vmovd %edi, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT: movzbl 1(%rsi,%rdx), %ecx
-; X64-AVX-NEXT: movzbl (%rsi,%rdx), %esi
-; X64-AVX-NEXT: vmovd %esi, %xmm1
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
-; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm1
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; X64-AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
entry:
@@ -923,24 +917,20 @@ define void @mul_2xi8_sext(i8* nocapture
;
; X86-AVX-LABEL: mul_2xi8_sext:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %edi
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movsbl 1(%edx,%ecx), %edi
-; X86-AVX-NEXT: movsbl (%edx,%ecx), %edx
+; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx
; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT: movsbl 1(%eax,%ecx), %edx
-; X86-AVX-NEXT: movsbl (%eax,%ecx), %eax
+; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax
; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
+; X86-AVX-NEXT: vpmovsxbd %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_sext:
@@ -963,14 +953,12 @@ define void @mul_2xi8_sext(i8* nocapture
; X64-AVX-LABEL: mul_2xi8_sext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movsbl 1(%rdi,%rdx), %ecx
-; X64-AVX-NEXT: movsbl (%rdi,%rdx), %edi
-; X64-AVX-NEXT: vmovd %edi, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT: movsbl 1(%rsi,%rdx), %ecx
-; X64-AVX-NEXT: movsbl (%rsi,%rdx), %esi
-; X64-AVX-NEXT: vmovd %esi, %xmm1
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm1
+; X64-AVX-NEXT: vpmovsxbd %xmm1, %xmm1
; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
@@ -1023,24 +1011,20 @@ define void @mul_2xi8_sext_zext(i8* noca
;
; X86-AVX-LABEL: mul_2xi8_sext_zext:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %edi
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movsbl 1(%edx,%ecx), %edi
-; X86-AVX-NEXT: movsbl (%edx,%ecx), %edx
+; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx
; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT: movzbl 1(%eax,%ecx), %edx
-; X86-AVX-NEXT: movzbl (%eax,%ecx), %eax
+; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax
; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_sext_zext:
@@ -1064,14 +1048,12 @@ define void @mul_2xi8_sext_zext(i8* noca
; X64-AVX-LABEL: mul_2xi8_sext_zext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movsbl 1(%rdi,%rdx), %ecx
-; X64-AVX-NEXT: movsbl (%rdi,%rdx), %edi
-; X64-AVX-NEXT: vmovd %edi, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT: movzbl 1(%rsi,%rdx), %ecx
-; X64-AVX-NEXT: movzbl (%rsi,%rdx), %esi
-; X64-AVX-NEXT: vmovd %esi, %xmm1
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm1
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
@@ -1118,24 +1100,18 @@ define void @mul_2xi16_sext(i8* nocaptur
;
; X86-AVX-LABEL: mul_2xi16_sext:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %edi
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movswl 2(%edx,%ecx), %edi
-; X86-AVX-NEXT: movswl (%edx,%ecx), %edx
-; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT: movswl 2(%eax,%ecx), %edx
-; X86-AVX-NEXT: movswl (%eax,%ecx), %eax
-; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
+; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
+; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vpmovsxwd %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_sext:
@@ -1153,14 +1129,10 @@ define void @mul_2xi16_sext(i8* nocaptur
; X64-AVX-LABEL: mul_2xi16_sext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movswl 2(%rdi,%rdx), %ecx
-; X64-AVX-NEXT: movswl (%rdi,%rdx), %edi
-; X64-AVX-NEXT: vmovd %edi, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT: movswl 2(%rsi,%rdx), %ecx
-; X64-AVX-NEXT: movswl (%rsi,%rdx), %esi
-; X64-AVX-NEXT: vmovd %esi, %xmm1
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
+; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vpmovsxwd %xmm1, %xmm1
; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
@@ -1213,22 +1185,18 @@ define void @mul_2xi16_sext_zext(i8* noc
;
; X86-AVX-LABEL: mul_2xi16_sext_zext:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %edi
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movswl 2(%edx,%ecx), %edi
-; X86-AVX-NEXT: movswl (%edx,%ecx), %edx
-; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_sext_zext:
@@ -1252,10 +1220,8 @@ define void @mul_2xi16_sext_zext(i8* noc
; X64-AVX-LABEL: mul_2xi16_sext_zext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movswl 2(%rdi,%rdx), %ecx
-; X64-AVX-NEXT: movswl (%rdi,%rdx), %edi
-; X64-AVX-NEXT: vmovd %edi, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
@@ -1460,20 +1426,14 @@ define void @mul_2xi8_varconst1(i8* noca
;
; X86-AVX-LABEL: mul_2xi8_varconst1:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst1:
@@ -1491,10 +1451,9 @@ define void @mul_2xi8_varconst1(i8* noca
; X64-AVX-LABEL: mul_2xi8_varconst1:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1534,20 +1493,14 @@ define void @mul_2xi8_varconst2(i8* noca
;
; X86-AVX-LABEL: mul_2xi8_varconst2:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst2:
@@ -1566,10 +1519,9 @@ define void @mul_2xi8_varconst2(i8* noca
; X64-AVX-LABEL: mul_2xi8_varconst2:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1611,20 +1563,14 @@ define void @mul_2xi8_varconst3(i8* noca
;
; X86-AVX-LABEL: mul_2xi8_varconst3:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst3:
@@ -1645,10 +1591,9 @@ define void @mul_2xi8_varconst3(i8* noca
; X64-AVX-LABEL: mul_2xi8_varconst3:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1690,20 +1635,14 @@ define void @mul_2xi8_varconst4(i8* noca
;
; X86-AVX-LABEL: mul_2xi8_varconst4:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst4:
@@ -1724,10 +1663,9 @@ define void @mul_2xi8_varconst4(i8* noca
; X64-AVX-LABEL: mul_2xi8_varconst4:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1769,20 +1707,14 @@ define void @mul_2xi8_varconst5(i8* noca
;
; X86-AVX-LABEL: mul_2xi8_varconst5:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst5:
@@ -1803,10 +1735,9 @@ define void @mul_2xi8_varconst5(i8* noca
; X64-AVX-LABEL: mul_2xi8_varconst5:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1848,20 +1779,14 @@ define void @mul_2xi8_varconst6(i8* noca
;
; X86-AVX-LABEL: mul_2xi8_varconst6:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst6:
@@ -1882,10 +1807,9 @@ define void @mul_2xi8_varconst6(i8* noca
; X64-AVX-LABEL: mul_2xi8_varconst6:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1988,20 +1912,13 @@ define void @mul_2xi16_varconst2(i8* noc
;
; X86-AVX-LABEL: mul_2xi16_varconst2:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movswl 2(%ecx,%eax), %esi
-; X86-AVX-NEXT: movswl (%ecx,%eax), %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_varconst2:
@@ -2019,10 +1936,8 @@ define void @mul_2xi16_varconst2(i8* noc
; X64-AVX-LABEL: mul_2xi16_varconst2:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movswl 2(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movswl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -2137,20 +2052,13 @@ define void @mul_2xi16_varconst4(i8* noc
;
; X86-AVX-LABEL: mul_2xi16_varconst4:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movswl 2(%ecx,%eax), %esi
-; X86-AVX-NEXT: movswl (%ecx,%eax), %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_varconst4:
@@ -2172,10 +2080,8 @@ define void @mul_2xi16_varconst4(i8* noc
; X64-AVX-LABEL: mul_2xi16_varconst4:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movswl 2(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movswl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll?rev=346916&r1=346915&r2=346916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll Wed Nov 14 16:21:41 2018
@@ -3168,19 +3168,15 @@ define <2 x double> @sitofp_load_2i16_to
;
; SSE41-LABEL: sitofp_load_2i16_to_2f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: movswl 2(%rdi), %eax
-; SSE41-NEXT: movswl (%rdi), %ecx
-; SSE41-NEXT: movd %ecx, %xmm0
-; SSE41-NEXT: pinsrd $1, %eax, %xmm0
+; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: sitofp_load_2i16_to_2f64:
; AVX: # %bb.0:
-; AVX-NEXT: movswl 2(%rdi), %eax
-; AVX-NEXT: movswl (%rdi), %ecx
-; AVX-NEXT: vmovd %ecx, %xmm0
-; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load <2 x i16>, <2 x i16> *%a
@@ -3201,19 +3197,17 @@ define <2 x double> @sitofp_load_2i8_to_
;
; SSE41-LABEL: sitofp_load_2i8_to_2f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: movsbl 1(%rdi), %eax
-; SSE41-NEXT: movsbl (%rdi), %ecx
-; SSE41-NEXT: movd %ecx, %xmm0
-; SSE41-NEXT: pinsrd $1, %eax, %xmm0
+; SSE41-NEXT: movzwl (%rdi), %eax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: sitofp_load_2i8_to_2f64:
; AVX: # %bb.0:
-; AVX-NEXT: movsbl 1(%rdi), %eax
-; AVX-NEXT: movsbl (%rdi), %ecx
-; AVX-NEXT: vmovd %ecx, %xmm0
-; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: movzwl (%rdi), %eax
+; AVX-NEXT: vmovd %eax, %xmm0
+; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load <2 x i8>, <2 x i8> *%a
@@ -3606,19 +3600,17 @@ define <2 x double> @uitofp_load_2i8_to_
;
; SSE41-LABEL: uitofp_load_2i8_to_2f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: movzbl 1(%rdi), %eax
-; SSE41-NEXT: movzbl (%rdi), %ecx
-; SSE41-NEXT: movd %ecx, %xmm0
-; SSE41-NEXT: pinsrd $1, %eax, %xmm0
+; SSE41-NEXT: movzwl (%rdi), %eax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: uitofp_load_2i8_to_2f64:
; AVX: # %bb.0:
-; AVX-NEXT: movzbl 1(%rdi), %eax
-; AVX-NEXT: movzbl (%rdi), %ecx
-; AVX-NEXT: vmovd %ecx, %xmm0
-; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: movzwl (%rdi), %eax
+; AVX-NEXT: vmovd %eax, %xmm0
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load <2 x i8>, <2 x i8> *%a
Modified: llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll?rev=346916&r1=346915&r2=346916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll Wed Nov 14 16:21:41 2018
@@ -5062,29 +5062,26 @@ define <2 x i32> @sext_2i8_to_2i32(<2 x
;
; SSE41-LABEL: sext_2i8_to_2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movsbl 1(%rdi), %eax
-; SSE41-NEXT: movsbl (%rdi), %ecx
-; SSE41-NEXT: movd %ecx, %xmm0
-; SSE41-NEXT: pinsrd $1, %eax, %xmm0
+; SSE41-NEXT: movzwl (%rdi), %eax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
; SSE41-NEXT: paddd %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: sext_2i8_to_2i32:
; AVX: # %bb.0:
-; AVX-NEXT: movsbl 1(%rdi), %eax
-; AVX-NEXT: movsbl (%rdi), %ecx
-; AVX-NEXT: vmovd %ecx, %xmm0
-; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: movzwl (%rdi), %eax
+; AVX-NEXT: vmovd %eax, %xmm0
+; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X32-SSE41-LABEL: sext_2i8_to_2i32:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT: movsbl 1(%eax), %ecx
-; X32-SSE41-NEXT: movsbl (%eax), %eax
+; X32-SSE41-NEXT: movzwl (%eax), %eax
; X32-SSE41-NEXT: movd %eax, %xmm0
-; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
+; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm0
; X32-SSE41-NEXT: paddd %xmm0, %xmm0
; X32-SSE41-NEXT: retl
%x = load <2 x i8>, <2 x i8>* %addr, align 1
Modified: llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll?rev=346916&r1=346915&r2=346916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll Wed Nov 14 16:21:41 2018
@@ -2272,19 +2272,17 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x
;
; SSE41-LABEL: zext_2i8_to_2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movzbl 1(%rdi), %eax
-; SSE41-NEXT: movzbl (%rdi), %ecx
-; SSE41-NEXT: movd %ecx, %xmm0
-; SSE41-NEXT: pinsrd $1, %eax, %xmm0
+; SSE41-NEXT: movzwl (%rdi), %eax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: paddd %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: zext_2i8_to_2i32:
; AVX: # %bb.0:
-; AVX-NEXT: movzbl 1(%rdi), %eax
-; AVX-NEXT: movzbl (%rdi), %ecx
-; AVX-NEXT: vmovd %ecx, %xmm0
-; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: movzwl (%rdi), %eax
+; AVX-NEXT: vmovd %eax, %xmm0
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%x = load <2 x i8>, <2 x i8>* %addr, align 1
More information about the llvm-commits
mailing list