[llvm] dd4bf22 - [X86] combineBlendOfPermutes - don't introduce lane-crossing permutes without AVX2 support.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed May 8 02:23:08 PDT 2024
Author: Simon Pilgrim
Date: 2024-05-08T10:22:58+01:00
New Revision: dd4bf22b9380e797362fac1415a1796da338b2db
URL: https://github.com/llvm/llvm-project/commit/dd4bf22b9380e797362fac1415a1796da338b2db
DIFF: https://github.com/llvm/llvm-project/commit/dd4bf22b9380e797362fac1415a1796da338b2db.diff
LOG: [X86] combineBlendOfPermutes - don't introduce lane-crossing permutes without AVX2 support.
Fixes #91433
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8ec4984dfa557..3ae68c438aa7f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40078,10 +40078,10 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
// Attempt to fold BLEND(PERMUTE(X),PERMUTE(Y)) -> PERMUTE(BLEND(X,Y))
// iff we don't demand the same element index for both X and Y.
-static SDValue combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1,
- ArrayRef<int> BlendMask,
- const APInt &DemandedElts,
- SelectionDAG &DAG, const SDLoc &DL) {
+static SDValue
+combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1, ArrayRef<int> BlendMask,
+ const APInt &DemandedElts, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget, const SDLoc &DL) {
assert(isBlendOrUndef(BlendMask) && "Blend shuffle expected");
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
@@ -40156,6 +40156,11 @@ static SDValue combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1,
return SDValue();
}
+ // Don't introduce lane-crossing permutes without AVX2.
+ if (VT.is256BitVector() && !Subtarget.hasAVX2() &&
+ isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), NewPermuteMask))
+ return SDValue();
+
SDValue NewBlend =
DAG.getVectorShuffle(VT, DL, DAG.getBitcast(VT, Ops0[0]),
DAG.getBitcast(VT, Ops1[0]), NewBlendMask);
@@ -41918,9 +41923,9 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
case X86ISD::BLENDI: {
SmallVector<int, 16> BlendMask;
DecodeBLENDMask(NumElts, Op.getConstantOperandVal(2), BlendMask);
- if (SDValue R = combineBlendOfPermutes(VT.getSimpleVT(), Op.getOperand(0),
- Op.getOperand(1), BlendMask,
- DemandedElts, TLO.DAG, SDLoc(Op)))
+ if (SDValue R = combineBlendOfPermutes(
+ VT.getSimpleVT(), Op.getOperand(0), Op.getOperand(1), BlendMask,
+ DemandedElts, TLO.DAG, Subtarget, SDLoc(Op)))
return TLO.CombineTo(Op, R);
break;
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index bc95fd42e6b84..ced9304f4c59c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -699,6 +699,28 @@ define <4 x double> @shuffle_v4f64_0437(<4 x double> %a, <4 x double> %b) {
ret <4 x double> %shuffle
}
+; PR91433
+define <4 x double> @shuffle_v4f64_2303(<4 x double> %a) {
+; AVX1-LABEL: shuffle_v4f64_2303:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3]
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4f64_2303:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,3]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_2303:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,3]
+; AVX512VL-NEXT: retq
+ %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 3>
+ ret <4 x double> %shuffle
+}
+
define <4 x double> @shuffle_v4f64_0z3z(<4 x double> %a, <4 x double> %b) {
; ALL-LABEL: shuffle_v4f64_0z3z:
; ALL: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 81ce14132c879..0c65f756f296f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -308,8 +308,9 @@ define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) {
define <8 x i32> @combine_blend_of_permutes_v8i32(<4 x i64> %a0, <4 x i64> %a1) {
; AVX1-LABEL: combine_blend_of_permutes_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6],ymm1[7]
; AVX1-NEXT: ret{{[l|q]}}
;
; AVX2-LABEL: combine_blend_of_permutes_v8i32:
More information about the llvm-commits
mailing list