[llvm] r327244 - [X86][AVX] createVariablePermute - widen permutes for cases where the source vector is wider than the destination type
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 11 10:00:47 PDT 2018
Author: rksimon
Date: Sun Mar 11 10:00:46 2018
New Revision: 327244
URL: http://llvm.org/viewvc/llvm-project?rev=327244&view=rev
Log:
[X86][AVX] createVariablePermute - widen permutes for cases where the source vector is wider than the destination type
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/var-permute-128.ll
llvm/trunk/test/CodeGen/X86/var-permute-256.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=327244&r1=327243&r2=327244&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Mar 11 10:00:46 2018
@@ -7941,6 +7941,7 @@ SDValue createVariablePermute(MVT VT, SD
MVT ShuffleVT = VT;
EVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
unsigned NumElts = VT.getVectorNumElements();
+ unsigned SizeInBits = VT.getSizeInBits();
// Adjust IndicesVec to match VT size.
assert(IndicesVec.getValueType().getVectorNumElements() >= NumElts &&
@@ -7950,11 +7951,24 @@ SDValue createVariablePermute(MVT VT, SD
NumElts * VT.getScalarSizeInBits());
IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
- // Adjust SrcVec to match VT type.
- if (SrcVec.getValueSizeInBits() > VT.getSizeInBits())
- return SDValue();
- else if (SrcVec.getValueSizeInBits() < VT.getSizeInBits())
- SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
+ // Handle SrcVec that don't match VT type.
+ if (SrcVec.getValueSizeInBits() != SizeInBits) {
+ if ((SrcVec.getValueSizeInBits() % SizeInBits) == 0) {
+ // Handle larger SrcVec by treating it as a larger permute.
+ unsigned Scale = SrcVec.getValueSizeInBits() / SizeInBits;
+ VT = MVT::getVectorVT(VT.getScalarType(), Scale * NumElts);
+ IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
+ IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
+ Subtarget, DAG, SDLoc(IndicesVec));
+ return extractSubVector(
+ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0,
+ DAG, DL, SizeInBits);
+ } else if (SrcVec.getValueSizeInBits() < SizeInBits) {
+ // Widen smaller SrcVec to match VT.
+ SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
+ } else
+ return SDValue();
+ }
auto ScaleIndices = [&DAG](SDValue Idx, uint64_t Scale) {
assert(isPowerOf2_64(Scale) && "Illegal variable permute shuffle scale");
Modified: llvm/trunk/test/CodeGen/X86/var-permute-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/var-permute-128.ll?rev=327244&r1=327243&r2=327244&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/var-permute-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/var-permute-128.ll Sun Mar 11 10:00:46 2018
@@ -566,66 +566,72 @@ define <16 x i8> @var_shuffle_v16i8_from
; SSE41-NEXT: popq %rbp
; SSE41-NEXT: retq
;
-; AVX-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
-; AVX: # %bb.0:
-; AVX-NEXT: pushq %rbp
-; AVX-NEXT: movq %rsp, %rbp
-; AVX-NEXT: andq $-32, %rsp
-; AVX-NEXT: subq $64, %rsp
-; AVX-NEXT: vpextrb $0, %xmm1, %eax
-; AVX-NEXT: vmovaps %ymm0, (%rsp)
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: movzbl (%rsp,%rax), %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpextrb $1, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $2, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $3, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $4, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $5, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $6, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $7, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $8, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $9, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $10, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $10, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $11, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $12, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $13, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $14, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: vpextrb $15, %xmm1, %eax
-; AVX-NEXT: andl $31, %eax
-; AVX-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT: movq %rbp, %rsp
-; AVX-NEXT: popq %rbp
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; XOP-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
+; XOP: # %bb.0:
+; XOP-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOP-NEXT: vpperm %xmm1, %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vzeroupper
+; XOP-NEXT: retq
+;
+; AVX1-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtb {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
+; AVX2-NEXT: vpshufb %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
+; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
+; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
+; AVX512F-NEXT: vpshufb %ymm1, %ymm2, %ymm2
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
+; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
+; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
+; AVX512VL-NEXT: vpshufb %ymm1, %ymm2, %ymm2
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm3
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
+; AVX512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
+; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; VBMI-LABEL: var_shuffle_v16i8_from_v32i8_v16i8:
+; VBMI: # %bb.0:
+; VBMI-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
+; VBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
+; VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; VBMI-NEXT: vzeroupper
+; VBMI-NEXT: retq
%index0 = extractelement <16 x i8> %indices, i32 0
%index1 = extractelement <16 x i8> %indices, i32 1
%index2 = extractelement <16 x i8> %indices, i32 2
Modified: llvm/trunk/test/CodeGen/X86/var-permute-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/var-permute-256.ll?rev=327244&r1=327243&r2=327244&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/var-permute-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/var-permute-256.ll Sun Mar 11 10:00:46 2018
@@ -1655,29 +1655,40 @@ entry:
}
define <4 x i32> @var_shuffle_v4i32_from_v8i32(<8 x i32> %v, <4 x i32> %indices) unnamed_addr nounwind {
-; AVX-LABEL: var_shuffle_v4i32_from_v8i32:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: pushq %rbp
-; AVX-NEXT: movq %rsp, %rbp
-; AVX-NEXT: andq $-32, %rsp
-; AVX-NEXT: subq $64, %rsp
-; AVX-NEXT: vmovd %xmm1, %eax
-; AVX-NEXT: vmovaps %ymm0, (%rsp)
-; AVX-NEXT: andl $7, %eax
-; AVX-NEXT: vpextrd $1, %xmm1, %ecx
-; AVX-NEXT: andl $7, %ecx
-; AVX-NEXT: vpextrd $2, %xmm1, %edx
-; AVX-NEXT: andl $7, %edx
-; AVX-NEXT: vpextrd $3, %xmm1, %esi
-; AVX-NEXT: andl $7, %esi
-; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vpinsrd $1, (%rsp,%rcx,4), %xmm0, %xmm0
-; AVX-NEXT: vpinsrd $2, (%rsp,%rdx,4), %xmm0, %xmm0
-; AVX-NEXT: vpinsrd $3, (%rsp,%rsi,4), %xmm0, %xmm0
-; AVX-NEXT: movq %rbp, %rsp
-; AVX-NEXT: popq %rbp
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; XOP-LABEL: var_shuffle_v4i32_from_v8i32:
+; XOP: # %bb.0: # %entry
+; XOP-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
+; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
+; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; XOP-NEXT: vpermil2ps $0, %ymm1, %ymm2, %ymm0, %ymm0
+; XOP-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; XOP-NEXT: vzeroupper
+; XOP-NEXT: retq
+;
+; AVX1-LABEL: var_shuffle_v4i32_from_v8i32:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
+; AVX1-NEXT: vpermilps %ymm1, %ymm2, %ymm2
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} ymm3 = [3,3,3,3,3,3,3,3]
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vpcmpgtd %xmm4, %xmm0, %xmm4
+; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; INT256-LABEL: var_shuffle_v4i32_from_v8i32:
+; INT256: # %bb.0: # %entry
+; INT256-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
+; INT256-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; INT256-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; INT256-NEXT: vzeroupper
+; INT256-NEXT: retq
entry:
%tmp1 = extractelement <4 x i32> %indices, i32 0
%vecext2.8 = extractelement <8 x i32> %v, i32 %tmp1
More information about the llvm-commits
mailing list