[llvm] 5636eb8 - [X86] combineBlendOfPermutes - allow whole-lane permutation on AVX1 targets.

Wed May 8 09:06:23 PDT 2024

Author: Simon Pilgrim
Date: 2024-05-08T17:05:45+01:00
New Revision: 5636eb89bd69f9c55f4e4aeafaa8c04aa99e5c84

URL: https://github.com/llvm/llvm-project/commit/5636eb89bd69f9c55f4e4aeafaa8c04aa99e5c84
DIFF: https://github.com/llvm/llvm-project/commit/5636eb89bd69f9c55f4e4aeafaa8c04aa99e5c84.diff

LOG: [X86] combineBlendOfPermutes - allow whole-lane permutation on AVX1 targets.

dd4bf22b9380e797362fac1415a1796da338b2db fixed #91433 but meant we couldn't use vperm2f128 to permute entire 128-bit lanes - if the new 256-bit permutation mask can be scaled to 2x128-bit elements, then we can still fold.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 126577a4d59e..0410cc33ca33 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40161,9 +40161,12 @@ combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1, ArrayRef<int> BlendMask,
       return SDValue();
   }
 
-  // Don't introduce lane-crossing permutes without AVX2.
+  // Don't introduce lane-crossing permutes without AVX2, unless it can be
+  // widened to a lane permute (vperm2f128).
   if (VT.is256BitVector() && !Subtarget.hasAVX2() &&
-      isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), NewPermuteMask))
+      isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(),
+                                NewPermuteMask) &&
+      !canScaleShuffleElements(NewPermuteMask, 2))
     return SDValue();
 
   SDValue NewBlend =

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 0c65f756f296..81ce14132c87 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -308,9 +308,8 @@ define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) {
 define <8 x i32> @combine_blend_of_permutes_v8i32(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX1-LABEL: combine_blend_of_permutes_v8i32:
 ; AVX1:       # %bb.0:
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
-; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6],ymm1[7]
 ; AVX1-NEXT:    ret{{[l|q]}}
 ;
 ; AVX2-LABEL: combine_blend_of_permutes_v8i32: