[llvm] 8cbd819 - [X86] Improving folding of concat_vectors of subvectors from the same broadcast
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri May 1 03:31:23 PDT 2020
Author: Simon Pilgrim
Date: 2020-05-01T11:23:10+01:00
New Revision: 8cbd8194c1aac564879df36931109a5b9f2c1388
URL: https://github.com/llvm/llvm-project/commit/8cbd8194c1aac564879df36931109a5b9f2c1388
DIFF: https://github.com/llvm/llvm-project/commit/8cbd8194c1aac564879df36931109a5b9f2c1388.diff
LOG: [X86] Improving folding of concat_vectors of subvectors from the same broadcast
Handle concat_vectors(extract_subvector(broadcast(x)), extract_subvector(broadcast(x))) -> broadcast(x)
To expose this we also need collectConcatOps to recognise the insert_subvector(x, extract_subvector(x, lo), hi) subvector splat pattern
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-bitselect.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 86d756dbd189..6e52f4cd6d47 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5728,13 +5728,21 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
// TODO - Handle more general insert_subvector chains.
if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
- Idx == (VT.getVectorNumElements() / 2) &&
- Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
- Src.getOperand(1).getValueType() == SubVT &&
- isNullConstant(Src.getOperand(2))) {
- Ops.push_back(Src.getOperand(1));
- Ops.push_back(Sub);
- return true;
+ Idx == (VT.getVectorNumElements() / 2)) {
+ // insert_subvector(insert_subvector(undef, x, lo), y, hi)
+ if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Src.getOperand(1).getValueType() == SubVT &&
+ isNullConstant(Src.getOperand(2))) {
+ Ops.push_back(Src.getOperand(1));
+ Ops.push_back(Sub);
+ return true;
+ }
+ // insert_subvector(x, extract_subvector(x, lo), hi)
+ if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) {
+ Ops.append(2, Sub);
+ return true;
+ }
}
}
@@ -46679,6 +46687,15 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
(EltSizeInBits >= 32 && MayFoldLoad(Op0.getOperand(0)))) &&
Op0.getOperand(0).getValueType() == VT.getScalarType())
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));
+
+ // concat_vectors(extract_subvector(broadcast(x)),
+ // extract_subvector(broadcast(x))) -> broadcast(x)
+ if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Op0.getOperand(0).getValueType() == VT) {
+ if (Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST ||
+ Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD)
+ return Op0.getOperand(0);
+ }
}
// Repeated opcode.
diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll
index a2d55ea8cf0b..37128e312132 100644
--- a/llvm/test/CodeGen/X86/combine-bitselect.ll
+++ b/llvm/test/CodeGen/X86/combine-bitselect.ll
@@ -590,28 +590,16 @@ define <4 x i64> @bitselect_v4i64_broadcast_rrm(<4 x i64> %a0, <4 x i64> %a1, i6
; XOP-LABEL: bitselect_v4i64_broadcast_rrm:
; XOP: # %bb.0:
; XOP-NEXT: vbroadcastsd (%rdi), %ymm2
-; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm3
-; XOP-NEXT: vandps %ymm2, %ymm0, %ymm0
-; XOP-NEXT: vandnps %ymm1, %ymm3, %ymm1
-; XOP-NEXT: vorps %ymm1, %ymm0, %ymm0
+; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; XOP-NEXT: retq
;
-; AVX1-LABEL: bitselect_v4i64_broadcast_rrm:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastsd (%rdi), %ymm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm3
-; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vandnps %ymm1, %ymm3, %ymm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: bitselect_v4i64_broadcast_rrm:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm2
-; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vandnps %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: bitselect_v4i64_broadcast_rrm:
+; AVX: # %bb.0:
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm2
+; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vandnps %ymm1, %ymm2, %ymm1
+; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: bitselect_v4i64_broadcast_rrm:
; AVX512F: # %bb.0:
@@ -986,37 +974,20 @@ define <8 x i64> @bitselect_v8i64_broadcast_rrm(<8 x i64> %a0, <8 x i64> %a1, i6
; XOP-LABEL: bitselect_v8i64_broadcast_rrm:
; XOP: # %bb.0:
; XOP-NEXT: vbroadcastsd (%rdi), %ymm4
-; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm5
-; XOP-NEXT: vandps %ymm4, %ymm1, %ymm1
-; XOP-NEXT: vandps %ymm4, %ymm0, %ymm0
-; XOP-NEXT: vandnps %ymm3, %ymm5, %ymm3
-; XOP-NEXT: vorps %ymm3, %ymm1, %ymm1
-; XOP-NEXT: vandnps %ymm2, %ymm5, %ymm2
-; XOP-NEXT: vorps %ymm2, %ymm0, %ymm0
+; XOP-NEXT: vpcmov %ymm4, %ymm2, %ymm0, %ymm0
+; XOP-NEXT: vpcmov %ymm4, %ymm3, %ymm1, %ymm1
; XOP-NEXT: retq
;
-; AVX1-LABEL: bitselect_v8i64_broadcast_rrm:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastsd (%rdi), %ymm4
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm5
-; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1
-; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
-; AVX1-NEXT: vandnps %ymm3, %ymm5, %ymm3
-; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vandnps %ymm2, %ymm5, %ymm2
-; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: bitselect_v8i64_broadcast_rrm:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm4
-; AVX2-NEXT: vandps %ymm4, %ymm1, %ymm1
-; AVX2-NEXT: vandps %ymm4, %ymm0, %ymm0
-; AVX2-NEXT: vandnps %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vorps %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vandnps %ymm2, %ymm4, %ymm2
-; AVX2-NEXT: vorps %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: bitselect_v8i64_broadcast_rrm:
+; AVX: # %bb.0:
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm4
+; AVX-NEXT: vandps %ymm4, %ymm1, %ymm1
+; AVX-NEXT: vandps %ymm4, %ymm0, %ymm0
+; AVX-NEXT: vandnps %ymm3, %ymm4, %ymm3
+; AVX-NEXT: vorps %ymm3, %ymm1, %ymm1
+; AVX-NEXT: vandnps %ymm2, %ymm4, %ymm2
+; AVX-NEXT: vorps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: bitselect_v8i64_broadcast_rrm:
; AVX512: # %bb.0:
More information about the llvm-commits
mailing list