[llvm-branch-commits] [llvm-branch] r322679 - Merging r322272:
Hans Wennborg via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 17 08:18:37 PST 2018
Author: hans
Date: Wed Jan 17 08:18:37 2018
New Revision: 322679
URL: http://llvm.org/viewvc/llvm-project?rev=322679&view=rev
Log:
Merging r322272:
------------------------------------------------------------------------
r322272 | zvi | 2018-01-11 04:26:52 -0800 (Thu, 11 Jan 2018) | 15 lines
X86: Fix LowerBUILD_VECTORAsVariablePermute for case Src is smaller than Indices
Summary:
As RKSimon suggested in pr35820, in the case that Src is smaller in
bit-size than Indices, need to widen Src to avoid type mismatch.
Fixes pr35820
Reviewers: RKSimon, craig.topper
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D41865
------------------------------------------------------------------------
Modified:
llvm/branches/release_60/ (props changed)
llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp
llvm/branches/release_60/test/CodeGen/X86/var-permute-256.ll
Propchange: llvm/branches/release_60/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Jan 17 08:18:37 2018
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,321789,321791,321862,321870,321872,321980,321991,321993-321994,322056,322103,322473,322623
+/llvm/trunk:155241,321789,321791,321862,321870,321872,321980,321991,321993-321994,322056,322103,322272,322473,322623
Modified: llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp?rev=322679&r1=322678&r2=322679&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp Wed Jan 17 08:18:37 2018
@@ -7893,6 +7893,11 @@ LowerBUILD_VECTORAsVariablePermute(SDVal
IndicesVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
+ if (SrcVec.getValueSizeInBits() < IndicesVT.getSizeInBits()) {
+ SrcVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT),
+ SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));
+ }
return DAG.getNode(VT == MVT::v16i8 ? X86ISD::PSHUFB : X86ISD::VPERMV,
SDLoc(V), VT, IndicesVec, SrcVec);
}
Modified: llvm/branches/release_60/test/CodeGen/X86/var-permute-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/var-permute-256.ll?rev=322679&r1=322678&r2=322679&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/var-permute-256.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/var-permute-256.ll Wed Jan 17 08:18:37 2018
@@ -1277,3 +1277,183 @@ define <8 x float> @var_shuffle_v8f32(<8
%ret7 = insertelement <8 x float> %ret6, float %v7, i32 7
ret <8 x float> %ret7
}
+
+define <8 x i32> @pr35820(<4 x i32> %v, <8 x i32> %indices) unnamed_addr nounwind {
+; AVX1-LABEL: pr35820:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpextrq $1, %xmm1, %r8
+; AVX1-NEXT: movq %r8, %r10
+; AVX1-NEXT: shrq $30, %r10
+; AVX1-NEXT: vmovq %xmm1, %r9
+; AVX1-NEXT: movq %r9, %rsi
+; AVX1-NEXT: shrq $30, %rsi
+; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: andl $3, %r9d
+; AVX1-NEXT: andl $12, %esi
+; AVX1-NEXT: andl $3, %r8d
+; AVX1-NEXT: andl $12, %r10d
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: movq %rax, %rdi
+; AVX1-NEXT: shrq $30, %rdi
+; AVX1-NEXT: vmovq %xmm0, %rcx
+; AVX1-NEXT: movq %rcx, %rdx
+; AVX1-NEXT: shrq $30, %rdx
+; AVX1-NEXT: andl $3, %ecx
+; AVX1-NEXT: andl $12, %edx
+; AVX1-NEXT: andl $3, %eax
+; AVX1-NEXT: andl $12, %edi
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpinsrd $1, -24(%rsp,%rdx), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrd $2, -24(%rsp,%rax,4), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrd $3, -24(%rsp,%rdi), %xmm0, %xmm0
+; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpinsrd $1, -24(%rsp,%rsi), %xmm1, %xmm1
+; AVX1-NEXT: vpinsrd $2, -24(%rsp,%r8,4), %xmm1, %xmm1
+; AVX1-NEXT: vpinsrd $3, -24(%rsp,%r10), %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; INT256-LABEL: pr35820:
+; INT256: # %bb.0: # %entry
+; INT256-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
+; INT256-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; INT256-NEXT: retq
+entry:
+ %tmp1 = extractelement <8 x i32> %indices, i32 0
+ %vecext2.8 = extractelement <4 x i32> %v, i32 %tmp1
+ %tmp2 = extractelement <8 x i32> %indices, i32 1
+ %vecext2.9 = extractelement <4 x i32> %v, i32 %tmp2
+ %tmp3 = extractelement <8 x i32> %indices, i32 2
+ %vecext2.10 = extractelement <4 x i32> %v, i32 %tmp3
+ %tmp4 = extractelement <8 x i32> %indices, i32 3
+ %vecext2.11 = extractelement <4 x i32> %v, i32 %tmp4
+ %tmp5 = extractelement <8 x i32> %indices, i32 4
+ %vecext2.12 = extractelement <4 x i32> %v, i32 %tmp5
+ %tmp6 = extractelement <8 x i32> %indices, i32 5
+ %vecext2.13 = extractelement <4 x i32> %v, i32 %tmp6
+ %tmp7 = extractelement <8 x i32> %indices, i32 6
+ %vecext2.14 = extractelement <4 x i32> %v, i32 %tmp7
+ %tmp8 = extractelement <8 x i32> %indices, i32 7
+ %vecext2.15 = extractelement <4 x i32> %v, i32 %tmp8
+ %tmp9 = insertelement <8 x i32> undef, i32 %vecext2.8, i32 0
+ %tmp10 = insertelement <8 x i32> %tmp9, i32 %vecext2.9, i32 1
+ %tmp11 = insertelement <8 x i32> %tmp10, i32 %vecext2.10, i32 2
+ %tmp12 = insertelement <8 x i32> %tmp11, i32 %vecext2.11, i32 3
+ %tmp13 = insertelement <8 x i32> %tmp12, i32 %vecext2.12, i32 4
+ %tmp14 = insertelement <8 x i32> %tmp13, i32 %vecext2.13, i32 5
+ %tmp15 = insertelement <8 x i32> %tmp14, i32 %vecext2.14, i32 6
+ %tmp16 = insertelement <8 x i32> %tmp15, i32 %vecext2.15, i32 7
+ ret <8 x i32> %tmp16
+}
+
+define <8 x float> @pr35820_float(<4 x float> %v, <8 x i32> %indices) unnamed_addr nounwind {
+; AVX1-LABEL: pr35820_float:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpextrq $1, %xmm1, %r8
+; AVX1-NEXT: movq %r8, %r10
+; AVX1-NEXT: shrq $30, %r10
+; AVX1-NEXT: vmovq %xmm1, %r9
+; AVX1-NEXT: movq %r9, %rdx
+; AVX1-NEXT: shrq $30, %rdx
+; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: andl $3, %r9d
+; AVX1-NEXT: andl $12, %edx
+; AVX1-NEXT: andl $3, %r8d
+; AVX1-NEXT: andl $12, %r10d
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: movq %rax, %rdi
+; AVX1-NEXT: shrq $30, %rdi
+; AVX1-NEXT: vmovq %xmm0, %rcx
+; AVX1-NEXT: movq %rcx, %rsi
+; AVX1-NEXT: shrq $30, %rsi
+; AVX1-NEXT: andl $3, %ecx
+; AVX1-NEXT: andl $12, %esi
+; AVX1-NEXT: andl $3, %eax
+; AVX1-NEXT: andl $12, %edi
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; INT256-LABEL: pr35820_float:
+; INT256: # %bb.0: # %entry
+; INT256-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
+; INT256-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; INT256-NEXT: retq
+entry:
+ %tmp1 = extractelement <8 x i32> %indices, i32 0
+ %vecext2.8 = extractelement <4 x float> %v, i32 %tmp1
+ %tmp2 = extractelement <8 x i32> %indices, i32 1
+ %vecext2.9 = extractelement <4 x float> %v, i32 %tmp2
+ %tmp3 = extractelement <8 x i32> %indices, i32 2
+ %vecext2.10 = extractelement <4 x float> %v, i32 %tmp3
+ %tmp4 = extractelement <8 x i32> %indices, i32 3
+ %vecext2.11 = extractelement <4 x float> %v, i32 %tmp4
+ %tmp5 = extractelement <8 x i32> %indices, i32 4
+ %vecext2.12 = extractelement <4 x float> %v, i32 %tmp5
+ %tmp6 = extractelement <8 x i32> %indices, i32 5
+ %vecext2.13 = extractelement <4 x float> %v, i32 %tmp6
+ %tmp7 = extractelement <8 x i32> %indices, i32 6
+ %vecext2.14 = extractelement <4 x float> %v, i32 %tmp7
+ %tmp8 = extractelement <8 x i32> %indices, i32 7
+ %vecext2.15 = extractelement <4 x float> %v, i32 %tmp8
+ %tmp9 = insertelement <8 x float> undef, float %vecext2.8, i32 0
+ %tmp10 = insertelement <8 x float> %tmp9, float %vecext2.9, i32 1
+ %tmp11 = insertelement <8 x float> %tmp10, float %vecext2.10, i32 2
+ %tmp12 = insertelement <8 x float> %tmp11, float %vecext2.11, i32 3
+ %tmp13 = insertelement <8 x float> %tmp12, float %vecext2.12, i32 4
+ %tmp14 = insertelement <8 x float> %tmp13, float %vecext2.13, i32 5
+ %tmp15 = insertelement <8 x float> %tmp14, float %vecext2.14, i32 6
+ %tmp16 = insertelement <8 x float> %tmp15, float %vecext2.15, i32 7
+ ret <8 x float> %tmp16
+}
+
+define <4 x i32> @big_source(<8 x i32> %v, <4 x i32> %indices) unnamed_addr nounwind {
+; AVX-LABEL: big_source:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rbp
+; AVX-NEXT: movq %rsp, %rbp
+; AVX-NEXT: andq $-32, %rsp
+; AVX-NEXT: subq $64, %rsp
+; AVX-NEXT: vmovq %xmm1, %rax
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shrq $30, %rcx
+; AVX-NEXT: andl $28, %ecx
+; AVX-NEXT: vpextrq $1, %xmm1, %rdx
+; AVX-NEXT: movq %rdx, %rsi
+; AVX-NEXT: sarq $32, %rsi
+; AVX-NEXT: andl $7, %eax
+; AVX-NEXT: andl $7, %edx
+; AVX-NEXT: vmovaps %ymm0, (%rsp)
+; AVX-NEXT: andl $7, %esi
+; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: vpinsrd $1, (%rsp,%rcx), %xmm0, %xmm0
+; AVX-NEXT: vpinsrd $2, (%rsp,%rdx,4), %xmm0, %xmm0
+; AVX-NEXT: vpinsrd $3, (%rsp,%rsi,4), %xmm0, %xmm0
+; AVX-NEXT: movq %rbp, %rsp
+; AVX-NEXT: popq %rbp
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+entry:
+ %tmp1 = extractelement <4 x i32> %indices, i32 0
+ %vecext2.8 = extractelement <8 x i32> %v, i32 %tmp1
+ %tmp2 = extractelement <4 x i32> %indices, i32 1
+ %vecext2.9 = extractelement <8 x i32> %v, i32 %tmp2
+ %tmp3 = extractelement <4 x i32> %indices, i32 2
+ %vecext2.10 = extractelement <8 x i32> %v, i32 %tmp3
+ %tmp4 = extractelement <4 x i32> %indices, i32 3
+ %vecext2.11 = extractelement <8 x i32> %v, i32 %tmp4
+ %tmp9 = insertelement <4 x i32> undef, i32 %vecext2.8, i32 0
+ %tmp10 = insertelement <4 x i32> %tmp9, i32 %vecext2.9, i32 1
+ %tmp11 = insertelement <4 x i32> %tmp10, i32 %vecext2.10, i32 2
+ %tmp12 = insertelement <4 x i32> %tmp11, i32 %vecext2.11, i32 3
+ ret <4 x i32> %tmp12
+}
More information about the llvm-branch-commits
mailing list