[llvm] r323223 - [X86][SSE] LowerBUILD_VECTORAsVariablePermute - extract subvector from oversized index vectors

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 23 09:02:15 PST 2018


Author: rksimon
Date: Tue Jan 23 09:02:15 2018
New Revision: 323223

URL: http://llvm.org/viewvc/llvm-project?rev=323223&view=rev
Log:
[X86][SSE] LowerBUILD_VECTORAsVariablePermute - extract subvector from oversized index vectors

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/var-permute-128.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=323223&r1=323222&r2=323223&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan 23 09:02:15 2018
@@ -7913,21 +7913,25 @@ LowerBUILD_VECTORAsVariablePermute(SDVal
     else if (IndicesVec != ExtractedIndex.getOperand(0))
       return SDValue();
 
-    // The index vector must be the same size as the destination.
-    if (IndicesVec.getValueType().getVectorNumElements() != E)
-      return SDValue();
-
     auto *PermIdx = dyn_cast<ConstantSDNode>(ExtractedIndex.getOperand(1));
     if (!PermIdx || PermIdx->getZExtValue() != Idx)
       return SDValue();
   }
 
+  unsigned NumElts = VT.getVectorNumElements();
+  if (IndicesVec.getValueType().getVectorNumElements() < NumElts)
+    return SDValue();
+  else if (IndicesVec.getValueType().getVectorNumElements() > NumElts) {
+    IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec),
+                                  NumElts * VT.getScalarSizeInBits());
+  }
+
   MVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
   IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
 
-  if (SrcVec.getValueSizeInBits() > IndicesVT.getSizeInBits())
+  if (SrcVec.getValueSizeInBits() > VT.getSizeInBits())
     return SDValue();
-  else if (SrcVec.getValueSizeInBits() < IndicesVT.getSizeInBits()) {
+  else if (SrcVec.getValueSizeInBits() < VT.getSizeInBits()) {
     SrcVec =
         DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT),
                     SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));

Modified: llvm/trunk/test/CodeGen/X86/var-permute-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/var-permute-128.ll?rev=323223&r1=323222&r2=323223&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/var-permute-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/var-permute-128.ll Tue Jan 23 09:02:15 2018
@@ -362,56 +362,7 @@ define <16 x i8> @var_shuffle_v16i8_from
 ;
 ; AVX-LABEL: var_shuffle_v16i8_from_v16i8_v32i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpextrb $0, %xmm1, %eax
-; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    movzbl -24(%rsp,%rax), %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
-; AVX-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $1, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $2, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $2, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $3, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $3, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $4, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $4, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $5, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $5, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $6, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $6, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $7, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $7, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $8, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $8, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $9, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $9, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $10, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $10, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $11, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $11, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $12, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $12, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $13, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $13, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $14, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $14, -24(%rsp,%rax), %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $15, %xmm1, %eax
-; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    vpinsrb $15, -24(%rsp,%rax), %xmm0, %xmm0
+; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
   %index0 = extractelement <32 x i8> %indices, i32 0




More information about the llvm-commits mailing list