[llvm] r327213 - [X86][XOP] createVariablePermute - use VPPERM for v32i8 variable permutes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 10 08:51:45 PST 2018


Author: rksimon
Date: Sat Mar 10 08:51:45 2018
New Revision: 327213

URL: http://llvm.org/viewvc/llvm-project?rev=327213&view=rev
Log:
[X86][XOP] createVariablePermute - use VPPERM for v32i8 variable permutes

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/var-permute-256.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=327213&r1=327212&r2=327213&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar 10 08:51:45 2018
@@ -7972,6 +7972,16 @@ SDValue createVariablePermute(MVT VT, SD
   case MVT::v32i8:
     if (Subtarget.hasVLX() && Subtarget.hasVBMI())
       Opcode = X86ISD::VPERMV;
+    else if (Subtarget.hasXOP()) {
+      SDValue LoSrc = extract128BitVector(SrcVec, 0, DAG, DL);
+      SDValue HiSrc = extract128BitVector(SrcVec, 16, DAG, DL);
+      SDValue LoIdx = extract128BitVector(IndicesVec, 0, DAG, DL);
+      SDValue HiIdx = extract128BitVector(IndicesVec, 16, DAG, DL);
+      return DAG.getNode(
+          ISD::CONCAT_VECTORS, DL, VT,
+          DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, LoIdx),
+          DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, HiIdx));
+    }
     break;
   case MVT::v16i16:
     if (Subtarget.hasVLX() && Subtarget.hasBWI())

Modified: llvm/trunk/test/CodeGen/X86/var-permute-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/var-permute-256.ll?rev=327213&r1=327212&r2=327213&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/var-permute-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/var-permute-256.ll Sat Mar 10 08:51:45 2018
@@ -673,129 +673,11 @@ define <16 x i16> @var_shuffle_v16i16(<1
 define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
 ; XOP-LABEL: var_shuffle_v32i8:
 ; XOP:       # %bb.0:
-; XOP-NEXT:    pushq %rbp
-; XOP-NEXT:    movq %rsp, %rbp
-; XOP-NEXT:    andq $-32, %rsp
-; XOP-NEXT:    subq $64, %rsp
 ; XOP-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; XOP-NEXT:    vpextrb $0, %xmm2, %eax
-; XOP-NEXT:    vmovaps %ymm0, (%rsp)
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vmovd %eax, %xmm0
-; XOP-NEXT:    vpextrb $1, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $2, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $3, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $4, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $5, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $6, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $7, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $8, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $9, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $10, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $11, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $12, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $13, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $14, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $15, %xmm2, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $0, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vmovd %eax, %xmm2
-; XOP-NEXT:    vpextrb $1, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $1, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $2, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $2, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $3, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $3, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $4, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $4, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $5, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $5, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $6, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $6, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $7, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $7, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $8, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $8, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $9, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $9, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $10, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $10, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $11, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $11, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $12, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $12, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $13, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $13, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $14, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    vpinsrb $14, (%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $15, %xmm1, %eax
-; XOP-NEXT:    andl $31, %eax
-; XOP-NEXT:    movzbl (%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
-; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; XOP-NEXT:    movq %rbp, %rsp
-; XOP-NEXT:    popq %rbp
+; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOP-NEXT:    vpperm %xmm2, %xmm3, %xmm0, %xmm2
+; XOP-NEXT:    vpperm %xmm1, %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; XOP-NEXT:    retq
 ;
 ; AVX1-LABEL: var_shuffle_v32i8:
@@ -2376,122 +2258,9 @@ define <32 x i8> @var_shuffle_v32i8_from
 ; XOP-LABEL: var_shuffle_v32i8_from_v16i8:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; XOP-NEXT:    vpextrb $0, %xmm2, %eax
-; XOP-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vmovd %eax, %xmm0
-; XOP-NEXT:    vpextrb $1, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $2, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $3, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $4, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $5, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $6, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $7, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $8, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $9, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $10, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $11, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $12, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $13, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $14, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $15, %xmm2, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
-; XOP-NEXT:    vpextrb $0, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vmovd %eax, %xmm2
-; XOP-NEXT:    vpextrb $1, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $1, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $2, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $2, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $3, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $3, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $4, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $4, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $5, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $5, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $6, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $6, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $7, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $7, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $8, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $8, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $9, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $9, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $10, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $10, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $11, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $11, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $12, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $12, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $13, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $13, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $14, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    vpinsrb $14, -24(%rsp,%rax), %xmm2, %xmm2
-; XOP-NEXT:    vpextrb $15, %xmm1, %eax
-; XOP-NEXT:    andl $15, %eax
-; XOP-NEXT:    movzbl -24(%rsp,%rax), %eax
-; XOP-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
-; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOP-NEXT:    vpperm %xmm2, %xmm0, %xmm0, %xmm2
+; XOP-NEXT:    vpperm %xmm1, %xmm0, %xmm0, %xmm0
+; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; XOP-NEXT:    retq
 ;
 ; AVX1-LABEL: var_shuffle_v32i8_from_v16i8:




More information about the llvm-commits mailing list