[llvm] 7593f9b - [X86] combineConcatVectorOps - add handling for X86ISD::SHUF128 nodes.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 8 10:14:09 PDT 2023
Author: Simon Pilgrim
Date: 2023-08-08T18:13:43+01:00
New Revision: 7593f9b59a16d395c4bcfab8347037cefdf45f71
URL: https://github.com/llvm/llvm-project/commit/7593f9b59a16d395c4bcfab8347037cefdf45f71
DIFF: https://github.com/llvm/llvm-project/commit/7593f9b59a16d395c4bcfab8347037cefdf45f71.diff
LOG: [X86] combineConcatVectorOps - add handling for X86ISD::SHUF128 nodes.
Prevents regression on some future work to improve codegen for concat_vectors(extract_subvector(),extract_subvector()) patterns.
X86ISD::SHUF128 optimization is still pretty poor (especially the zmm variant), not optimizing the shuffle demanded elts like we do for SHUFPS.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-4.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 93c3beb72e5096..c0a87ec3a00dbc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54590,6 +54590,21 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
}
break;
+ case X86ISD::SHUF128: {
+ if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {
+ unsigned Imm0 = Ops[0].getConstantOperandVal(2);
+ unsigned Imm1 = Ops[1].getConstantOperandVal(2);
+ unsigned Imm = ((Imm0 & 1) << 0) | ((Imm0 & 2) << 1) | 0x08 |
+ ((Imm1 & 1) << 4) | ((Imm1 & 2) << 5) | 0x80;
+ SDValue LHS = concatSubVectors(Ops[0].getOperand(0),
+ Ops[0].getOperand(1), DAG, DL);
+ SDValue RHS = concatSubVectors(Ops[1].getOperand(0),
+ Ops[1].getOperand(1), DAG, DL);
+ return DAG.getNode(X86ISD::SHUF128, DL, VT, LHS, RHS,
+ DAG.getTargetConstant(Imm, DL, MVT::i8));
+ }
+ break;
+ }
case ISD::TRUNCATE:
if (!IsSplat && NumOps == 2 && VT.is256BitVector()) {
EVT SrcVT = Ops[0].getOperand(0).getValueType();
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-4.ll
index 884bc068daf1ba..65028fbbb13f1c 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-4.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-4.ll
@@ -400,10 +400,10 @@ define void @store_i8_stride4_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512BW-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15]
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm2
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm4, %ymm5
-; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm3[2,3],ymm1[2,3]
-; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm4[2,3],ymm0[2,3]
; AVX512BW-NEXT: vinserti64x4 $1, %ymm5, %zmm2, %zmm2
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm4, %zmm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
+; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[2,3,6,7],zmm0[2,3,6,7]
; AVX512BW-NEXT: vmovdqa64 %zmm0, 64(%r8)
; AVX512BW-NEXT: vmovdqa64 %zmm2, (%r8)
; AVX512BW-NEXT: vzeroupper
@@ -690,22 +690,22 @@ define void @store_i8_stride4_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm2[4],zmm0[5],zmm2[5],zmm0[6],zmm2[6],zmm0[7],zmm2[7],zmm0[12],zmm2[12],zmm0[13],zmm2[13],zmm0[14],zmm2[14],zmm0[15],zmm2[15],zmm0[20],zmm2[20],zmm0[21],zmm2[21],zmm0[22],zmm2[22],zmm0[23],zmm2[23],zmm0[28],zmm2[28],zmm0[29],zmm2[29],zmm0[30],zmm2[30],zmm0[31],zmm2[31]
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm2
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm4, %ymm5
-; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm6 = ymm3[2,3],ymm1[2,3]
-; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm7 = ymm4[2,3],ymm0[2,3]
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm3, %ymm3
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm8
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm4, %ymm4
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm4, %ymm9
-; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm3[2,3],ymm1[2,3]
-; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm4[2,3],ymm0[2,3]
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm6
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm3, %ymm7
+; AVX512BW-NEXT: vinserti128 $1, %xmm6, %ymm7, %ymm6
+; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm8
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm4, %ymm9
+; AVX512BW-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8
; AVX512BW-NEXT: vinserti64x4 $1, %ymm5, %zmm2, %zmm2
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm7, %zmm6, %zmm3
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm9, %zmm8, %zmm4
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512BW-NEXT: vmovdqa64 %zmm0, 192(%r8)
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm4, %zmm4
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm3
+; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[2,3,6,7],zmm4[2,3,6,7]
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm8, %zmm6, %zmm4
+; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm9[0,1,2,3],zmm0[4,5,6,7]
+; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm7[0,1,2,3],zmm1[4,5,6,7]
+; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[2,3,6,7],zmm0[2,3,6,7]
; AVX512BW-NEXT: vmovdqa64 %zmm3, 64(%r8)
+; AVX512BW-NEXT: vmovdqa64 %zmm0, 192(%r8)
; AVX512BW-NEXT: vmovdqa64 %zmm4, 128(%r8)
; AVX512BW-NEXT: vmovdqa64 %zmm2, (%r8)
; AVX512BW-NEXT: vzeroupper
More information about the llvm-commits
mailing list