[llvm] ed1b0da - [X86] combineConcatVectorOps - fold v4i64/v8x32 concat(broadcast(),broadcast()) -> permilps(concat())
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 25 07:40:14 PDT 2022
Author: Simon Pilgrim
Date: 2022-10-25T15:37:42+01:00
New Revision: ed1b0da5570a9aaf02b11c0aa691ab9f99ea4c34
URL: https://github.com/llvm/llvm-project/commit/ed1b0da5570a9aaf02b11c0aa691ab9f99ea4c34
DIFF: https://github.com/llvm/llvm-project/commit/ed1b0da5570a9aaf02b11c0aa691ab9f99ea4c34.diff
LOG: [X86] combineConcatVectorOps - fold v4i64/v8x32 concat(broadcast(),broadcast()) -> permilps(concat())
Extend the existing v4f64 fold to handle v4i64/v8f32/v8i32 as well
Fixes #58585
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c908ec6fb48f..de1c8f03b861 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54460,11 +54460,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
unsigned NumOps = Ops.size();
switch (Op0.getOpcode()) {
case X86ISD::VBROADCAST: {
- if (!IsSplat && VT == MVT::v4f64 && llvm::all_of(Ops, [](SDValue Op) {
+ if (!IsSplat && llvm::all_of(Ops, [](SDValue Op) {
return Op.getOperand(0).getValueType().is128BitVector();
- }))
- return DAG.getNode(X86ISD::MOVDDUP, DL, VT,
- ConcatSubOperand(VT, Ops, 0));
+ })) {
+ if (VT == MVT::v4f64 || VT == MVT::v4i64)
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT,
+ ConcatSubOperand(VT, Ops, 0),
+ ConcatSubOperand(VT, Ops, 0));
+ // TODO: Add pseudo v8i32 PSHUFD handling to AVX1Only targets.
+ if (VT == MVT::v8f32 || (VT == MVT::v8i32 && Subtarget.hasInt256()))
+ return DAG.getNode(VT == MVT::v8f32 ? X86ISD::VPERMILPI
+ : X86ISD::PSHUFD,
+ DL, VT, ConcatSubOperand(VT, Ops, 0),
+ getV4X86ShuffleImm8ForMask({0, 0, 0, 0}, DL, DAG));
+ }
break;
}
case X86ISD::MOVDDUP:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 5f5b799a4c6c..d8f49797ba0f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -1550,16 +1550,16 @@ define <4 x i64> @shuffle_v4i64_0044_v2i64(<2 x i64> %a, <2 x i64> %b) {
;
; AVX2-LABEL: shuffle_v4i64_0044_v2i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i64_0044_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; AVX512VL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
+; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; AVX512VL-NEXT: retq
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
index 6f843ab0e847..8c196a377da6 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -639,19 +639,12 @@ define <8 x float> @shuffle_v8f32_00224466_v4f32(<4 x float> %a, <4 x float> %b)
}
define <8 x float> @shuffle_v8f32_00004444_v4f32(<4 x float> %a, <4 x float> %b) {
-; AVX1-LABEL: shuffle_v8f32_00004444_v4f32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8f32_00004444_v4f32:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0
-; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_00004444_v4f32:
+; ALL: # %bb.0:
+; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; ALL-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x float> %1
}
@@ -3289,19 +3282,12 @@ define <8 x i32> @shuffle_v8i32_32107654_v4i32(<4 x i32> %a, <4 x i32> %b) {
}
define <8 x i32> @shuffle_v8i32_00004444_v4f32(<4 x i32> %a, <4 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_00004444_v4f32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_00004444_v4f32:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0
-; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_00004444_v4f32:
+; ALL: # %bb.0:
+; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; ALL-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i32> %1
}
More information about the llvm-commits
mailing list