[llvm] 71dfdbe - [X86] getFauxShuffleMask - handle insert_subvector(zero, sub, C)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 7 03:14:21 PDT 2020


Author: Simon Pilgrim
Date: 2020-09-07T11:10:40+01:00
New Revision: 71dfdbe2c73afcc319bfd96c9e73407ea9245e3a

URL: https://github.com/llvm/llvm-project/commit/71dfdbe2c73afcc319bfd96c9e73407ea9245e3a
DIFF: https://github.com/llvm/llvm-project/commit/71dfdbe2c73afcc319bfd96c9e73407ea9245e3a.diff

LOG: [X86] getFauxShuffleMask - handle insert_subvector(zero, sub, C)

Directly use SM_SentinelZero elements if we're (widening)inserting into a zero vector.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 99d35f0c91ff..09855fd0eb92 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7452,8 +7452,11 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
     }
     Ops.push_back(Src);
     Ops.append(SubInputs.begin(), SubInputs.end());
-    for (int i = 0; i != (int)NumElts; ++i)
-      Mask.push_back(i);
+    if (ISD::isBuildVectorAllZeros(Src.getNode()))
+      Mask.append(NumElts, SM_SentinelZero);
+    else
+      for (int i = 0; i != (int)NumElts; ++i)
+        Mask.push_back(i);
     for (int i = 0; i != (int)NumSubElts; ++i) {
       int M = SubMask[i];
       if (0 <= M) {

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index d4ef76a2a9cf..e744dbd10336 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -156,8 +156,7 @@ define <4 x double> @combine_vperm2f128_vpermilvar_as_vperm2f128(<4 x double> %a
 define <4 x double> @combine_vperm2f128_vpermilvar_as_vmovaps(<4 x double> %a0) {
 ; CHECK-LABEL: combine_vperm2f128_vpermilvar_as_vmovaps:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; CHECK-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; CHECK-NEXT:    vmovaps %xmm0, %xmm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
   %2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>


        


More information about the llvm-commits mailing list