[llvm] f1b76c5 - X86] combineConcatVectorOps - IsConcatFree - peek through bitcasts to find inplace subvectors.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 23 07:24:09 PDT 2024


Author: Simon Pilgrim
Date: 2024-07-23T15:22:44+01:00
New Revision: f1b76c53b6312a10d2ee4219e980498e7cbe5d53

URL: https://github.com/llvm/llvm-project/commit/f1b76c53b6312a10d2ee4219e980498e7cbe5d53
DIFF: https://github.com/llvm/llvm-project/commit/f1b76c53b6312a10d2ee4219e980498e7cbe5d53.diff

LOG: X86] combineConcatVectorOps - IsConcatFree - peek through bitcasts to find inplace subvectors.

The EXTRACT_SUBVECTOR nodes don't have to be the same type, they just need to be at the correct bit offsets when concatenated back together.

This reapplies d43ec97de081755990264049eba09cb7c83cb321 (after being reverted 68cb903594cd03dd708ef70c85c10807a6deefb5) now that 65e86a8f3fb44dc09dc2e08526d69e3a57f63995 has landed to address a downstream issue.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vselect-avx.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5a9d679d7002c..1c1f9e68facc0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -56183,18 +56183,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
     };
     auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {
       bool AllConstants = true;
-      bool AllSubVectors = true;
+      bool AllSubs = true;
+      unsigned VecSize = VT.getSizeInBits();
       for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
-        SDValue Sub = SubOps[I].getOperand(Op);
-        unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
-        SDValue BC = peekThroughBitcasts(Sub);
+        SDValue BC = peekThroughBitcasts(SubOps[I].getOperand(Op));
+        unsigned SubSize = BC.getValueSizeInBits();
+        unsigned EltSize = BC.getScalarValueSizeInBits();
         AllConstants &= ISD::isBuildVectorOfConstantSDNodes(BC.getNode()) ||
                         ISD::isBuildVectorOfConstantFPSDNodes(BC.getNode());
-        AllSubVectors &= Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
-                         Sub.getOperand(0).getValueType() == VT &&
-                         Sub.getConstantOperandAPInt(1) == (I * NumSubElts);
+        AllSubs &= BC.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+                   BC.getOperand(0).getValueSizeInBits() == VecSize &&
+                   (BC.getConstantOperandVal(1) * EltSize) == (I * SubSize);
       }
-      return AllConstants || AllSubVectors;
+      return AllConstants || AllSubs;
     };
 
     switch (Op0.getOpcode()) {

diff  --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index bd26948766a56..364390a4a60e5 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -259,7 +259,7 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
   ret void
 }
 
-; TODO: Concatenate 128-bit pblendvb back together on AVX2+ targets (hidden by SSE __m128i bitcasts)
+; Concatenate 128-bit pblendvb back together on AVX2+ targets (hidden by SSE __m128i bitcasts)
 define <4 x i64> @vselect_concat_split_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64>  %c, <4 x i64> %d) {
 ; AVX1-LABEL: vselect_concat_split_v16i8:
 ; AVX1:       ## %bb.0:
@@ -277,24 +277,13 @@ define <4 x i64> @vselect_concat_split_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i6
 ; AVX2-LABEL: vselect_concat_split_v16i8:
 ; AVX2:       ## %bb.0:
 ; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm3, %ymm2
-; AVX2-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm3
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
-; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm2
-; AVX2-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm3, %ymm0
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: vselect_concat_split_v16i8:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vpcmpgtb %ymm2, %ymm3, %ymm2
-; AVX512-NEXT:    vextracti128 $1, %ymm2, %xmm3
-; AVX512-NEXT:    vextracti128 $1, %ymm1, %xmm4
-; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 killed $ymm1 def $ymm1
-; AVX512-NEXT:    vpternlogq $226, %xmm0, %xmm2, %xmm1
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; AVX512-NEXT:    vpternlogq $226, %xmm0, %xmm3, %xmm4
-; AVX512-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm0
+; AVX512-NEXT:    vpternlogq $216, %ymm2, %ymm1, %ymm0
 ; AVX512-NEXT:    retq
   %a.bc = bitcast <4 x i64> %a to <32 x i8>
   %b.bc = bitcast <4 x i64> %b to <32 x i8>


        


More information about the llvm-commits mailing list