[llvm] ac3f967 - [X86] canonicalizeShuffleWithBinOps - merge shuffles across binops if either source op is a known splat

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 18 09:14:23 PDT 2022


Author: Simon Pilgrim
Date: 2022-06-18T17:14:00+01:00
New Revision: ac3f967382abb63b402eb8a038152d9a17145311

URL: https://github.com/llvm/llvm-project/commit/ac3f967382abb63b402eb8a038152d9a17145311
DIFF: https://github.com/llvm/llvm-project/commit/ac3f967382abb63b402eb8a038152d9a17145311.diff

LOG: [X86] canonicalizeShuffleWithBinOps - merge shuffles across binops if either source op is a known splat

The shuffle of a splat (with no undefs) should always be removed

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
    llvm/test/CodeGen/X86/sar_fold64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e1298ed02b80..1b4c9e173468 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39363,17 +39363,18 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT ShuffleVT = N.getValueType();
 
-  auto IsMergeableWithShuffle = [](SDValue Op, bool FoldLoad = false) {
+  auto IsMergeableWithShuffle = [&DAG](SDValue Op, bool FoldLoad = false) {
     // AllZeros/AllOnes constants are freely shuffled and will peek through
     // bitcasts. Other constant build vectors do not peek through bitcasts. Only
     // merge with target shuffles if it has one use so shuffle combining is
-    // likely to kick in.
+    // likely to kick in. Shuffles of splats are expected to be removed.
     return ISD::isBuildVectorAllOnes(Op.getNode()) ||
            ISD::isBuildVectorAllZeros(Op.getNode()) ||
            ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
            ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) ||
            (isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) ||
-           (FoldLoad && isShuffleFoldableLoad(Op));
+           (FoldLoad && isShuffleFoldableLoad(Op)) ||
+           DAG.isSplatValue(Op, /*AllowUndefs*/ false);
   };
   auto IsSafeToMoveShuffle = [ShuffleVT](SDValue Op, unsigned BinOp) {
     // Ensure we only shuffle whole vector src elements, unless its a logical

diff  --git a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
index 85266a7a682c..0519130fdec9 100644
--- a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
+++ b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
@@ -215,9 +215,8 @@ define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) {
 ; KNL-LABEL: fsub_noundef_ee:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    vextractf32x4 $2, %zmm1, %xmm0
-; KNL-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
-; KNL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0
-; KNL-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; KNL-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; KNL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: fsub_noundef_ee:

diff  --git a/llvm/test/CodeGen/X86/sar_fold64.ll b/llvm/test/CodeGen/X86/sar_fold64.ll
index 17d4d9f5c215..a23d0cb4962f 100644
--- a/llvm/test/CodeGen/X86/sar_fold64.ll
+++ b/llvm/test/CodeGen/X86/sar_fold64.ll
@@ -117,9 +117,9 @@ define <4 x i32> @all_sign_bit_ashr_vec1(<4 x i32> %x) {
 ;
 ; AVX2-LABEL: all_sign_bit_ashr_vec1:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
 ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
@@ -177,9 +177,9 @@ define <4 x i32> @all_sign_bit_ashr_vec3(<4 x i32> %x) {
 ;
 ; AVX2-LABEL: all_sign_bit_ashr_vec3:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
 ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
 ; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq


        


More information about the llvm-commits mailing list