[llvm] c1cb733 - [X86] Improve lowering of v16i8->v16i1 truncate under prefer-vector-width=256.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 25 15:20:56 PDT 2020
Author: Craig Topper
Date: 2020-04-25T15:20:33-07:00
New Revision: c1cb733db66ee7c6a11fab0ea89f99c8f55b2286
URL: https://github.com/llvm/llvm-project/commit/c1cb733db66ee7c6a11fab0ea89f99c8f55b2286
DIFF: https://github.com/llvm/llvm-project/commit/c1cb733db66ee7c6a11fab0ea89f99c8f55b2286.diff
LOG: [X86] Improve lowering of v16i8->v16i1 truncate under prefer-vector-width=256.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 44552eb706a7..6aa42fba4eb0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20382,17 +20382,22 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
// trying to avoid 512-bit vectors. If we are avoiding 512-bit vectors
// we need to split into two 8 element vectors which we can extend to v8i32,
// truncate and concat the results. There's an additional complication if
- // the original type is v16i8. In that case we can't split the v16i8 so
- // first we pre-extend it to v16i16 which we can split to v8i16, then extend
- // to v8i32, truncate that to v8i1 and concat the two halves.
+ // the original type is v16i8. In that case we can't split the v16i8
+ // directly, so we need to shuffle high elements to low and use
+ // sign_extend_vector_inreg.
if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) {
+ SDValue Lo, Hi;
if (InVT == MVT::v16i8) {
- // First we need to sign extend up to 256-bits so we can split that.
- InVT = MVT::v16i16;
- In = DAG.getNode(ISD::SIGN_EXTEND, DL, InVT, In);
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, In);
+ Hi = DAG.getVectorShuffle(
+ InVT, DL, In, In,
+ {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, Hi);
+ } else {
+ assert(InVT == MVT::v16i16 && "Unexpected VT!");
+ Lo = extract128BitVector(In, 0, DAG, DL);
+ Hi = extract128BitVector(In, 8, DAG, DL);
}
- SDValue Lo = extract128BitVector(In, 0, DAG, DL);
- SDValue Hi = extract128BitVector(In, 8, DAG, DL);
// We're split now, just emit two truncates and a concat. The two
// truncates will trigger legalization to come back to this function.
Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Lo);
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll b/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
index bf5ba184fc00..904d0ff0025e 100644
--- a/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
+++ b/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
@@ -133,14 +133,12 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
; AVX256VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX256VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX256VL-NEXT: vpmovsxbw %xmm1, %xmm1
-; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX256VL-NEXT: vpmovsxbd %xmm1, %ymm1
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX256VL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX256VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX256VL-NEXT: vpmovsxbd %xmm1, %ymm1
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k2
-; AVX256VL-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX256VL-NEXT: vpmovsxbd %xmm0, %ymm0
; AVX256VL-NEXT: vptestmd %ymm0, %ymm0, %k3
; AVX256VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k3} {z}
More information about the llvm-commits
mailing list