[llvm] r295718 - [AVX512] Fix EXTRACT_VECTOR_ELT for v2i1/v4i1/v32i1/v64i1 with variable index.

Igor Breger via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 21 06:01:26 PST 2017


Author: ibreger
Date: Tue Feb 21 08:01:25 2017
New Revision: 295718

URL: http://llvm.org/viewvc/llvm-project?rev=295718&view=rev
Log:
[AVX512] Fix EXTRACT_VECTOR_ELT for v2i1/v4i1/v32i1/v64i1 with variable index.

Differential Revision: https://reviews.llvm.org/D30189


Added:
    llvm/trunk/test/CodeGen/X86/avx512-insert-extract_i1.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=295718&r1=295717&r2=295718&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Feb 21 08:01:25 2017
@@ -13732,10 +13732,14 @@ X86TargetLowering::ExtractBitFromMaskVec
          "Unexpected vector type in ExtractBitFromMaskVector");
 
   // variable index can't be handled in mask registers,
-  // extend vector to VR512
+  // extend vector to VR512/128
   if (!isa<ConstantSDNode>(Idx)) {
-    MVT ExtVT = (VecVT == MVT::v8i1 ?  MVT::v8i64 : MVT::v16i32);
-    SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
+    unsigned NumElts = VecVT.getVectorNumElements();
+    // Extending v8i1/v16i1 to 512-bit get better performance on KNL
+    // than extending to 128/256bit.
+    unsigned VecSize = (NumElts <= 4 ? 128 : 512);
+    MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize/NumElts), NumElts);
+    SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVT, Vec);
     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                               ExtVT.getVectorElementType(), Ext, Idx);
     return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);

Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=295718&r1=295717&r2=295718&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Tue Feb 21 08:01:25 2017
@@ -2209,3 +2209,226 @@ define i8 @test_extractelement_variable_
   %t2 = extractelement <64 x i8> %t1, i8 %i
   ret i8 %t2
 }
+
+define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) {
+; KNL-LABEL: test_extractelement_varible_v2i1:
+; KNL:       ## BB#0:
+; KNL-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; KNL-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; KNL-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; KNL-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; KNL-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT:    andl $1, %edi
+; KNL-NEXT:    movl -24(%rsp,%rdi,8), %eax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_extractelement_varible_v2i1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; SKX-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k0
+; SKX-NEXT:    vpmovm2q %k0, %xmm0
+; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    andl $1, %edi
+; SKX-NEXT:    movl -24(%rsp,%rdi,8), %eax
+; SKX-NEXT:    andl $1, %eax
+; SKX-NEXT:    retq
+  %t1 = icmp ugt <2 x i64> %a, %b
+  %t2 = extractelement <2 x i1> %t1, i32 %index
+  %res = zext i1 %t2 to i8
+  ret i8 %res
+}
+
+define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) {
+; KNL-LABEL: test_extractelement_varible_v4i1:
+; KNL:       ## BB#0:
+; KNL-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
+; KNL-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; KNL-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; KNL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; KNL-NEXT:    andl $3, %edi
+; KNL-NEXT:    movl -24(%rsp,%rdi,4), %eax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_extractelement_varible_v4i1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; SKX-NEXT:    vpcmpnleud %xmm1, %xmm0, %k0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    andl $3, %edi
+; SKX-NEXT:    movl -24(%rsp,%rdi,4), %eax
+; SKX-NEXT:    andl $1, %eax
+; SKX-NEXT:    retq
+  %t1 = icmp ugt <4 x i32> %a, %b
+  %t2 = extractelement <4 x i1> %t1, i32 %index
+  %res = zext i1 %t2 to i8
+  ret i8 %res
+}
+
+define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b, i32 %index) {
+; KNL-LABEL: test_extractelement_varible_v8i1:
+; KNL:       ## BB#0:
+; KNL-NEXT:    pushq %rbp
+; KNL-NEXT:  Lcfi42:
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:  Lcfi43:
+; KNL-NEXT:    .cfi_offset %rbp, -16
+; KNL-NEXT:    movq %rsp, %rbp
+; KNL-NEXT:  Lcfi44:
+; KNL-NEXT:    .cfi_def_cfa_register %rbp
+; KNL-NEXT:    andq $-64, %rsp
+; KNL-NEXT:    subq $128, %rsp
+; KNL-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k1
+; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vmovdqa64 %zmm0, (%rsp)
+; KNL-NEXT:    andl $7, %edi
+; KNL-NEXT:    movl (%rsp,%rdi,8), %eax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    movq %rbp, %rsp
+; KNL-NEXT:    popq %rbp
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_extractelement_varible_v8i1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    pushq %rbp
+; SKX-NEXT:  Lcfi39:
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:  Lcfi40:
+; SKX-NEXT:    .cfi_offset %rbp, -16
+; SKX-NEXT:    movq %rsp, %rbp
+; SKX-NEXT:  Lcfi41:
+; SKX-NEXT:    .cfi_def_cfa_register %rbp
+; SKX-NEXT:    andq $-64, %rsp
+; SKX-NEXT:    subq $128, %rsp
+; SKX-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; SKX-NEXT:    vpcmpnleud %ymm1, %ymm0, %k0
+; SKX-NEXT:    vpmovm2q %k0, %zmm0
+; SKX-NEXT:    vmovdqa64 %zmm0, (%rsp)
+; SKX-NEXT:    andl $7, %edi
+; SKX-NEXT:    movl (%rsp,%rdi,8), %eax
+; SKX-NEXT:    andl $1, %eax
+; SKX-NEXT:    movq %rbp, %rsp
+; SKX-NEXT:    popq %rbp
+; SKX-NEXT:    retq
+  %t1 = icmp ugt <8 x i32> %a, %b
+  %t2 = extractelement <8 x i1> %t1, i32 %index
+  %res = zext i1 %t2 to i8
+  ret i8 %res
+}
+
+define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %b, i32 %index) {
+; KNL-LABEL: test_extractelement_varible_v16i1:
+; KNL:       ## BB#0:
+; KNL-NEXT:    pushq %rbp
+; KNL-NEXT:  Lcfi45:
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:  Lcfi46:
+; KNL-NEXT:    .cfi_offset %rbp, -16
+; KNL-NEXT:    movq %rsp, %rbp
+; KNL-NEXT:  Lcfi47:
+; KNL-NEXT:    .cfi_def_cfa_register %rbp
+; KNL-NEXT:    andq $-64, %rsp
+; KNL-NEXT:    subq $128, %rsp
+; KNL-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k1
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vmovdqa32 %zmm0, (%rsp)
+; KNL-NEXT:    andl $15, %edi
+; KNL-NEXT:    movl (%rsp,%rdi,4), %eax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    movq %rbp, %rsp
+; KNL-NEXT:    popq %rbp
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_extractelement_varible_v16i1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    pushq %rbp
+; SKX-NEXT:  Lcfi42:
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:  Lcfi43:
+; SKX-NEXT:    .cfi_offset %rbp, -16
+; SKX-NEXT:    movq %rsp, %rbp
+; SKX-NEXT:  Lcfi44:
+; SKX-NEXT:    .cfi_def_cfa_register %rbp
+; SKX-NEXT:    andq $-64, %rsp
+; SKX-NEXT:    subq $128, %rsp
+; SKX-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
+; SKX-NEXT:    vpmovm2d %k0, %zmm0
+; SKX-NEXT:    vmovdqa32 %zmm0, (%rsp)
+; SKX-NEXT:    andl $15, %edi
+; SKX-NEXT:    movl (%rsp,%rdi,4), %eax
+; SKX-NEXT:    andl $1, %eax
+; SKX-NEXT:    movq %rbp, %rsp
+; SKX-NEXT:    popq %rbp
+; SKX-NEXT:    retq
+  %t1 = icmp ugt <16 x i32> %a, %b
+  %t2 = extractelement <16 x i1> %t1, i32 %index
+  %res = zext i1 %t2 to i8
+  ret i8 %res
+}
+
+define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b, i32 %index) {
+; KNL-LABEL: test_extractelement_varible_v32i1:
+; KNL:       ## BB#0:
+; KNL-NEXT:    pushq %rbp
+; KNL-NEXT:  Lcfi48:
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:  Lcfi49:
+; KNL-NEXT:    .cfi_offset %rbp, -16
+; KNL-NEXT:    movq %rsp, %rbp
+; KNL-NEXT:  Lcfi50:
+; KNL-NEXT:    .cfi_def_cfa_register %rbp
+; KNL-NEXT:    andq $-32, %rsp
+; KNL-NEXT:    subq $64, %rsp
+; KNL-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; KNL-NEXT:    vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; KNL-NEXT:    vpxor %ymm2, %ymm1, %ymm1
+; KNL-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; KNL-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    vmovdqa %ymm0, (%rsp)
+; KNL-NEXT:    andl $31, %edi
+; KNL-NEXT:    movq %rsp, %rax
+; KNL-NEXT:    movb (%rdi,%rax), %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    movq %rbp, %rsp
+; KNL-NEXT:    popq %rbp
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_extractelement_varible_v32i1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    pushq %rbp
+; SKX-NEXT:  Lcfi45:
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:  Lcfi46:
+; SKX-NEXT:    .cfi_offset %rbp, -16
+; SKX-NEXT:    movq %rsp, %rbp
+; SKX-NEXT:  Lcfi47:
+; SKX-NEXT:    .cfi_def_cfa_register %rbp
+; SKX-NEXT:    andq $-64, %rsp
+; SKX-NEXT:    subq $128, %rsp
+; SKX-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; SKX-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0
+; SKX-NEXT:    vpmovm2w %k0, %zmm0
+; SKX-NEXT:    vmovdqu16 %zmm0, (%rsp)
+; SKX-NEXT:    andl $31, %edi
+; SKX-NEXT:    movzwl (%rsp,%rdi,2), %eax
+; SKX-NEXT:    andl $1, %eax
+; SKX-NEXT:    movq %rbp, %rsp
+; SKX-NEXT:    popq %rbp
+; SKX-NEXT:    retq
+  %t1 = icmp ugt <32 x i8> %a, %b
+  %t2 = extractelement <32 x i1> %t1, i32 %index
+  %res = zext i1 %t2 to i8
+  ret i8 %res
+}
+

Added: llvm/trunk/test/CodeGen/X86/avx512-insert-extract_i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract_i1.ll?rev=295718&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract_i1.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract_i1.ll Tue Feb 21 08:01:25 2017
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=SKX_ONLY %s
+
+; TODO - fix fail on KNL and move this test to avx512-insert-extract.ll
+
+define zeroext i8 @test_extractelement_varible_v64i1(<64 x i8> %a, <64 x i8> %b, i32 %index) {
+; SKX-LABEL: test_extractelement_varible_v64i1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    pushq %rbp
+; SKX-NEXT:  Lcfi0:
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:  Lcfi1:
+; SKX-NEXT:    .cfi_offset %rbp, -16
+; SKX-NEXT:    movq %rsp, %rbp
+; SKX-NEXT:  Lcfi2:
+; SKX-NEXT:    .cfi_def_cfa_register %rbp
+; SKX-NEXT:    andq $-64, %rsp
+; SKX-NEXT:    subq $128, %rsp
+; SKX-NEXT:    ## kill: %EDI<def> %EDI<kill> %RDI<def>
+; SKX-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
+; SKX-NEXT:    vpmovm2b %k0, %zmm0
+; SKX-NEXT:    vmovdqu8 %zmm0, (%rsp)
+; SKX-NEXT:    andl $63, %edi
+; SKX-NEXT:    movq %rsp, %rax
+; SKX-NEXT:    movb (%rdi,%rax), %al
+; SKX-NEXT:    andb $1, %al
+; SKX-NEXT:    movzbl %al, %eax
+; SKX-NEXT:    movq %rbp, %rsp
+; SKX-NEXT:    popq %rbp
+; SKX-NEXT:    retq
+  %t1 = icmp ugt <64 x i8> %a, %b
+  %t2 = extractelement <64 x i1> %t1, i32 %index
+  %res = zext i1 %t2 to i8
+  ret i8 %res
+}
+




More information about the llvm-commits mailing list