[llvm] a6a258f - [X86][AVX] Fold concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 21 06:51:04 PST 2021
Author: Simon Pilgrim
Date: 2021-02-21T14:50:43Z
New Revision: a6a258f1da37a678867bd29f7125417944bdecb2
URL: https://github.com/llvm/llvm-project/commit/a6a258f1da37a678867bd29f7125417944bdecb2
DIFF: https://github.com/llvm/llvm-project/commit/a6a258f1da37a678867bd29f7125417944bdecb2.diff
LOG: [X86][AVX] Fold concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128
Fixes regression exposed by removing bitcasts across logic-ops in D96206.
Differential Revision: https://reviews.llvm.org/D96206
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/pr40891.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 617b6ba250cd..3ae706e5a139 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49249,6 +49249,29 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
}
+ // concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128.
+ // Only concat of subvector high halves which vperm2x128 is best at.
+ // TODO: This should go in combineX86ShufflesRecursively eventually.
+ if (VT.is256BitVector() && Ops.size() == 2) {
+ SDValue Src0 = peekThroughBitcasts(Ops[0]);
+ SDValue Src1 = peekThroughBitcasts(Ops[1]);
+ if (Src0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Src1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ EVT SrcVT0 = Src0.getOperand(0).getValueType();
+ EVT SrcVT1 = Src1.getOperand(0).getValueType();
+ unsigned NumSrcElts0 = SrcVT0.getVectorNumElements();
+ unsigned NumSrcElts1 = SrcVT1.getVectorNumElements();
+ if (SrcVT0.is256BitVector() && SrcVT1.is256BitVector() &&
+ Src0.getConstantOperandAPInt(1) == (NumSrcElts0 / 2) &&
+ Src1.getConstantOperandAPInt(1) == (NumSrcElts1 / 2)) {
+ return DAG.getNode(X86ISD::VPERM2X128, DL, VT,
+ DAG.getBitcast(VT, Src0.getOperand(0)),
+ DAG.getBitcast(VT, Src1.getOperand(0)),
+ DAG.getTargetConstant(0x31, DL, MVT::i8));
+ }
+ }
+ }
+
// Repeated opcode.
// TODO - combineX86ShufflesRecursively should handle shuffle concatenation
// but it currently struggles with
diff erent vector widths.
diff --git a/llvm/test/CodeGen/X86/pr40891.ll b/llvm/test/CodeGen/X86/pr40891.ll
index c935d8592a4d..d67739767b21 100644
--- a/llvm/test/CodeGen/X86/pr40891.ll
+++ b/llvm/test/CodeGen/X86/pr40891.ll
@@ -8,11 +8,9 @@ define <8 x i32> @foo(<8 x i64> %x, <4 x i64> %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vandps {{\.LCPI.*}}, %ymm1, %ymm1
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm2
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm2[0,2],ymm0[0,2],ymm2[4,6],ymm0[4,6]
+; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; CHECK-NEXT: retl
%a = shufflevector <4 x i64> %y, <4 x i64> <i64 12345, i64 67890, i64 13579, i64 24680>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%b = and <8 x i64> %x, %a
More information about the llvm-commits
mailing list