[llvm] r363499 - [X86][AVX] Decode constant bits from insert_subvector(c1, c2, c3)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 15 10:05:24 PDT 2019
Author: rksimon
Date: Sat Jun 15 10:05:24 2019
New Revision: 363499
URL: http://llvm.org/viewvc/llvm-project?rev=363499&view=rev
Log:
[X86][AVX] Decode constant bits from insert_subvector(c1, c2, c3)
This mostly happens due to SimplifyDemandedVectorElts reducing a vector to insert_subvector(undef, c1, 0)
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=363499&r1=363498&r2=363499&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jun 15 10:05:24 2019
@@ -5967,6 +5967,29 @@ static bool getTargetConstantBitsFromNod
return CastBitData(UndefSrcElts, SrcEltBits);
}
+ // Insert constant bits from a base and sub vector sources.
+ if (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ isa<ConstantSDNode>(Op.getOperand(2))) {
+ // TODO - support insert_subvector through bitcasts.
+ if (EltSizeInBits != VT.getScalarSizeInBits())
+ return false;
+
+ APInt UndefSubElts;
+ SmallVector<APInt, 32> EltSubBits;
+ if (getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
+ UndefSubElts, EltSubBits,
+ AllowWholeUndefs, AllowPartialUndefs) &&
+ getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+ UndefElts, EltBits, AllowWholeUndefs,
+ AllowPartialUndefs)) {
+ unsigned BaseIdx = Op.getConstantOperandVal(2);
+ UndefElts.insertBits(UndefSubElts, BaseIdx);
+ for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i)
+ EltBits[BaseIdx + i] = EltSubBits[i];
+ return true;
+ }
+ }
+
// Extract constant bits from a subvector's source.
if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
isa<ConstantSDNode>(Op.getOperand(1))) {
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll?rev=363499&r1=363498&r2=363499&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Sat Jun 15 10:05:24 2019
@@ -1805,10 +1805,8 @@ define <4 x i32> @test_masked_z_16xi32_t
define <4 x i32> @test_16xi32_to_4xi32_perm_mask9(<16 x i32> %vec) {
; CHECK-LABEL: test_16xi32_to_4xi32_perm_mask9:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,3]
-; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [4,1,0,2]
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; CHECK-NEXT: vpermd %ymm3, %ymm1, %ymm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [4,1,12,2]
+; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; CHECK-NEXT: vpermt2d %ymm0, %ymm2, %ymm1
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll?rev=363499&r1=363498&r2=363499&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll Sat Jun 15 10:05:24 2019
@@ -207,23 +207,10 @@ define <8 x float> @combine_vpermilvar_8
ret <8 x float> %1
}
define <8 x float> @demandedelts_vpermilvar_8f32_movsldup(<8 x float> %a0, i32 %a1) {
-; AVX1-LABEL: demandedelts_vpermilvar_8f32_movsldup:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = <u,0,2,2,4,4,6,6>
-; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],mem[4,5,6,7]
-; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,2,3,4,5,6,7]
-; AVX1-NEXT: ret{{[l|q]}}
-;
-; AVX2-LABEL: demandedelts_vpermilvar_8f32_movsldup:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
-; AVX2-NEXT: ret{{[l|q]}}
-;
-; AVX512-LABEL: demandedelts_vpermilvar_8f32_movsldup:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
-; AVX512-NEXT: ret{{[l|q]}}
+; CHECK-LABEL: demandedelts_vpermilvar_8f32_movsldup:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: ret{{[l|q]}}
%1 = insertelement <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>, i32 %a1, i32 0
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1)
%3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=363499&r1=363498&r2=363499&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Sat Jun 15 10:05:24 2019
@@ -933,10 +933,7 @@ define <8 x double> @combine_vpermi2var_
;
; X64-LABEL: combine_vpermi2var_8f64_as_permpd:
; X64: # %bb.0:
-; X64-NEXT: vmovapd {{.*#+}} zmm2 = <u,2,1,3,4,6,5,7>
-; X64-NEXT: vinsertf32x4 $0, {{.*}}(%rip), %zmm2, %zmm2
-; X64-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2
-; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm2[2,3,1,1,6,7,5,5]
+; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,2,2,5,7,6,6]
; X64-NEXT: retq
%res0 = insertelement <8 x i64> <i64 0, i64 2, i64 1, i64 3, i64 4, i64 6, i64 5, i64 7>, i64 %a2, i32 0
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %res0, <8 x double> %x1, i8 -1)
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=363499&r1=363498&r2=363499&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Sat Jun 15 10:05:24 2019
@@ -155,10 +155,7 @@ define <4 x double> @demandedelts_vpermi
;
; X64-LABEL: demandedelts_vpermil2pd256_as_shufpd:
; X64: # %bb.0:
-; X64-NEXT: vmovapd {{.*#+}} xmm2 = <u,4,2,7>
-; X64-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],mem[2,3]
-; X64-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0
-; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,3]
+; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm1[0,0],ymm0[3],ymm1[3]
; X64-NEXT: retq
%res0 = insertelement <4 x i64> <i64 0, i64 4, i64 2, i64 7>, i64 %a2, i32 0
%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %res0, i8 0)
More information about the llvm-commits
mailing list