[llvm] 4ed452b - [X86] getFauxShuffleMask - handle insert_subvector(src, bitcast(extract_subvector(sub))) patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 10 05:38:52 PDT 2023
Author: Simon Pilgrim
Date: 2023-08-10T13:38:38+01:00
New Revision: 4ed452b74778430664ba3e1a59ba2eaf441423e6
URL: https://github.com/llvm/llvm-project/commit/4ed452b74778430664ba3e1a59ba2eaf441423e6
DIFF: https://github.com/llvm/llvm-project/commit/4ed452b74778430664ba3e1a59ba2eaf441423e6.diff
LOG: [X86] getFauxShuffleMask - handle insert_subvector(src, bitcast(extract_subvector(sub))) patterns
Add bitcast handling to the existing insert_subvector(src, extract_subvector(sub)) pattern, and recognise undef src cases to allow us to detect vector widening patterns.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-trunc-packus.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2ec5b2930a0d63..6cc0b73314606f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5646,17 +5646,28 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
unsigned NumSubElts = SubVT.getVectorNumElements();
if (!N->isOnlyUserOf(Sub.getNode()))
return false;
+ SDValue SubBC = peekThroughBitcasts(Sub);
uint64_t InsertIdx = N.getConstantOperandVal(2);
// Handle INSERT_SUBVECTOR(SRC0, EXTRACT_SUBVECTOR(SRC1)).
- if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- Sub.getOperand(0).getValueType() == VT) {
- uint64_t ExtractIdx = Sub.getConstantOperandVal(1);
- for (int i = 0; i != (int)NumElts; ++i)
- Mask.push_back(i);
+ if (SubBC.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ SubBC.getOperand(0).getValueSizeInBits() == NumSizeInBits) {
+ uint64_t ExtractIdx = SubBC.getConstantOperandVal(1);
+ SDValue SubBCSrc = SubBC.getOperand(0);
+ unsigned NumSubSrcBCElts = SubBCSrc.getValueType().getVectorNumElements();
+ unsigned MaxElts = std::max(NumElts, NumSubSrcBCElts);
+ assert((MaxElts % NumElts) == 0 && (MaxElts % NumSubSrcBCElts) == 0 &&
+ "Subvector valuetype mismatch");
+ InsertIdx *= (MaxElts / NumElts);
+ ExtractIdx *= (MaxElts / NumSubSrcBCElts);
+ NumSubElts *= (MaxElts / NumElts);
+ bool SrcIsUndef = Src.isUndef();
+ for (int i = 0; i != (int)MaxElts; ++i)
+ Mask.push_back(SrcIsUndef ? SM_SentinelUndef : i);
for (int i = 0; i != (int)NumSubElts; ++i)
- Mask[InsertIdx + i] = NumElts + ExtractIdx + i;
- Ops.push_back(Src);
- Ops.push_back(Sub.getOperand(0));
+ Mask[InsertIdx + i] = (SrcIsUndef ? 0 : MaxElts) + ExtractIdx + i;
+ if (!SrcIsUndef)
+ Ops.push_back(Src);
+ Ops.push_back(SubBCSrc);
return true;
}
// Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).
diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll
index 52e533b5154806..8032c8df2bda84 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll
@@ -873,9 +873,8 @@ define <2 x i16> @trunc_packus_v2i64_v2i16(<2 x i64> %a0) {
; SSE41-NEXT: pand %xmm5, %xmm0
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
-; SSE41-NEXT: packusdw %xmm1, %xmm1
-; SSE41-NEXT: packusdw %xmm1, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_packus_v2i64_v2i16:
@@ -887,21 +886,32 @@ define <2 x i16> @trunc_packus_v2i64_v2i16(<2 x i64> %a0) {
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: trunc_packus_v2i64_v2i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
-; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: trunc_packus_v2i64_v2i16:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
+; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
+; AVX2-SLOW-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: trunc_packus_v2i64_v2i16:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
+; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
+; AVX2-FAST-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-FAST-NEXT: retq
;
; AVX512F-LABEL: trunc_packus_v2i64_v2i16:
; AVX512F: # %bb.0:
More information about the llvm-commits
mailing list