[llvm] r321369 - [X86] When lowering insert_vector_elt/extract_vector_elt of vXi1 with a non-constant index just use either a 128-bit type or the vXi8 type with the correct number of elements.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 22 09:18:11 PST 2017
Author: ctopper
Date: Fri Dec 22 09:18:11 2017
New Revision: 321369
URL: http://llvm.org/viewvc/llvm-project?rev=321369&view=rev
Log:
[X86] When lowering insert_vector_elt/extract_vector_elt of vXi1 with a non-constant index just use either a 128-bit type or the vXi8 type with the correct number of elements.
Despite what the comment said there isn't better codegen for 512-bit vectors. The 128/256/512 bit implementation jus stores to memory and loads an element. There's no advantage to doing that with a larger size. In fact in many cases it causes a stack realignment and generates worse code.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=321369&r1=321368&r2=321369&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Dec 22 09:18:11 2017
@@ -14578,11 +14578,10 @@ static SDValue ExtractBitFromMaskVector(
unsigned NumElts = VecVT.getVectorNumElements();
// Extending v8i1/v16i1 to 512-bit get better performance on KNL
// than extending to 128/256bit.
- unsigned VecSize = (NumElts <= 4 ? 128 : 512);
- MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize / NumElts), NumElts);
- SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVT, Vec);
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- ExtVT.getVectorElementType(), Ext, Idx);
+ MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
+ MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx);
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
}
@@ -14777,9 +14776,8 @@ static SDValue InsertBitToMaskVector(SDV
// Non constant index. Extend source and destination,
// insert element and then truncate the result.
unsigned NumElts = VecVT.getVectorNumElements();
- unsigned VecSize = (NumElts <= 4 ? 128 : 512);
- MVT ExtVecVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize/NumElts), NumElts);
- MVT ExtEltVT = ExtVecVT.getVectorElementType();
+ MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
+ MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec),
DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx);
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=321369&r1=321368&r2=321369&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Fri Dec 22 09:18:11 2017
@@ -1616,45 +1616,28 @@ define zeroext i8 @test_extractelement_v
define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b, i32 %index) {
; KNL-LABEL: test_extractelement_varible_v8i1:
; KNL: ## %bb.0:
-; KNL-NEXT: pushq %rbp
-; KNL-NEXT: .cfi_def_cfa_offset 16
-; KNL-NEXT: .cfi_offset %rbp, -16
-; KNL-NEXT: movq %rsp, %rbp
-; KNL-NEXT: .cfi_def_cfa_register %rbp
-; KNL-NEXT: andq $-64, %rsp
-; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
-; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT: vmovdqa64 %zmm0, (%rsp)
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: andl $7, %edi
-; KNL-NEXT: movzbl (%rsp,%rdi,8), %eax
+; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax
; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: movq %rbp, %rsp
-; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_varible_v8i1:
; SKX: ## %bb.0:
-; SKX-NEXT: pushq %rbp
-; SKX-NEXT: .cfi_def_cfa_offset 16
-; SKX-NEXT: .cfi_offset %rbp, -16
-; SKX-NEXT: movq %rsp, %rbp
-; SKX-NEXT: .cfi_def_cfa_register %rbp
-; SKX-NEXT: andq $-64, %rsp
-; SKX-NEXT: subq $128, %rsp
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
; SKX-NEXT: vpcmpnleud %ymm1, %ymm0, %k0
-; SKX-NEXT: vpmovm2q %k0, %zmm0
-; SKX-NEXT: vmovdqa64 %zmm0, (%rsp)
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $7, %edi
-; SKX-NEXT: movzbl (%rsp,%rdi,8), %eax
+; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax
; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: movq %rbp, %rsp
-; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%t1 = icmp ugt <8 x i32> %a, %b
@@ -1666,43 +1649,28 @@ define zeroext i8 @test_extractelement_v
define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %b, i32 %index) {
; KNL-LABEL: test_extractelement_varible_v16i1:
; KNL: ## %bb.0:
-; KNL-NEXT: pushq %rbp
-; KNL-NEXT: .cfi_def_cfa_offset 16
-; KNL-NEXT: .cfi_offset %rbp, -16
-; KNL-NEXT: movq %rsp, %rbp
-; KNL-NEXT: .cfi_def_cfa_register %rbp
-; KNL-NEXT: andq $-64, %rsp
-; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT: vmovdqa32 %zmm0, (%rsp)
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: andl $15, %edi
-; KNL-NEXT: movzbl (%rsp,%rdi,4), %eax
+; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
+; KNL-NEXT: movzbl (%rdi,%rax), %eax
; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: movq %rbp, %rsp
-; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_varible_v16i1:
; SKX: ## %bb.0:
-; SKX-NEXT: pushq %rbp
-; SKX-NEXT: .cfi_def_cfa_offset 16
-; SKX-NEXT: .cfi_offset %rbp, -16
-; SKX-NEXT: movq %rsp, %rbp
-; SKX-NEXT: .cfi_def_cfa_register %rbp
-; SKX-NEXT: andq $-64, %rsp
-; SKX-NEXT: subq $128, %rsp
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
-; SKX-NEXT: vpmovm2d %k0, %zmm0
-; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $15, %edi
-; SKX-NEXT: movzbl (%rsp,%rdi,4), %eax
+; SKX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
+; SKX-NEXT: movzbl (%rdi,%rax), %eax
; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: movq %rbp, %rsp
-; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%t1 = icmp ugt <16 x i32> %a, %b
@@ -1743,14 +1711,15 @@ define zeroext i8 @test_extractelement_v
; SKX-NEXT: .cfi_offset %rbp, -16
; SKX-NEXT: movq %rsp, %rbp
; SKX-NEXT: .cfi_def_cfa_register %rbp
-; SKX-NEXT: andq $-64, %rsp
-; SKX-NEXT: subq $128, %rsp
+; SKX-NEXT: andq $-32, %rsp
+; SKX-NEXT: subq $64, %rsp
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
-; SKX-NEXT: vpmovm2w %k0, %zmm0
-; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
+; SKX-NEXT: vpmovm2b %k0, %ymm0
+; SKX-NEXT: vmovdqa %ymm0, (%rsp)
; SKX-NEXT: andl $31, %edi
-; SKX-NEXT: movzbl (%rsp,%rdi,2), %eax
+; SKX-NEXT: movq %rsp, %rax
+; SKX-NEXT: movzbl (%rdi,%rax), %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: movq %rbp, %rsp
; SKX-NEXT: popq %rbp
@@ -1816,20 +1785,19 @@ define i32 @test_insertelement_variable_
; SKX-NEXT: .cfi_offset %rbp, -16
; SKX-NEXT: movq %rsp, %rbp
; SKX-NEXT: .cfi_def_cfa_register %rbp
-; SKX-NEXT: andq $-64, %rsp
-; SKX-NEXT: subq $128, %rsp
+; SKX-NEXT: andq $-32, %rsp
+; SKX-NEXT: subq $64, %rsp
; SKX-NEXT: ## kill: def %esi killed %esi def %rsi
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
-; SKX-NEXT: xorl %eax, %eax
-; SKX-NEXT: testb %dil, %dil
-; SKX-NEXT: setne %al
-; SKX-NEXT: vpmovm2w %k0, %zmm0
-; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
; SKX-NEXT: andl $31, %esi
-; SKX-NEXT: movw %ax, (%rsp,%rsi,2)
-; SKX-NEXT: vpsllw $15, (%rsp), %zmm0
-; SKX-NEXT: vpmovw2m %zmm0, %k0
+; SKX-NEXT: testb %dil, %dil
+; SKX-NEXT: vpmovm2b %k0, %ymm0
+; SKX-NEXT: vmovdqa %ymm0, (%rsp)
+; SKX-NEXT: movq %rsp, %rax
+; SKX-NEXT: setne (%rsi,%rax)
+; SKX-NEXT: vpsllw $7, (%rsp), %ymm0
+; SKX-NEXT: vpmovb2m %ymm0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: movq %rbp, %rsp
; SKX-NEXT: popq %rbp
More information about the llvm-commits
mailing list