[llvm] 058ac83 - [SLP]Use generic createShuffle for buildvector

Mon Nov 11 10:59:22 PST 2024

Author: Alexey Bataev
Date: 2024-11-11T10:49:39-08:00
New Revision: 058ac837bc35419bbbb34f3206f5aa229c669811

URL: https://github.com/llvm/llvm-project/commit/058ac837bc35419bbbb34f3206f5aa229c669811
DIFF: https://github.com/llvm/llvm-project/commit/058ac837bc35419bbbb34f3206f5aa229c669811.diff

LOG: [SLP]Use generic createShuffle for buildvector

Use generic createShuffle function, which know how to adjust the vectors
correctly, to avoid compiler crash when trying to build a buildvector as
a shuffle

Fixes #115732

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle-with-root.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index da8e0d8cc09a8b..1bf082d57b8bb0 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3045,7 +3045,9 @@ class BoUpSLP {
 
   /// \returns a vector from a collection of scalars in \p VL. if \p Root is not
   /// specified, the starting vector value is poison.
-  Value *gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy);
+  Value *
+  gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy,
+         function_ref<Value *(Value *, Value *, ArrayRef<int>)> CreateShuffle);
 
   /// \returns whether the VectorizableTree is fully vectorizable and will
   /// be beneficial even the tree height is tiny.
@@ -9167,8 +9169,9 @@ class BaseShuffleAnalysis {
     int VF = Mask.size();
     if (auto *FTy = dyn_cast<FixedVectorType>(V1->getType()))
       VF = FTy->getNumElements();
-    if (V2 &&
-        !isUndefVector(V2, buildUseMask(VF, Mask, UseMask::SecondArg)).all()) {
+    if (V2 && !isUndefVector</*IsPoisonOnly=*/true>(
+                   V2, buildUseMask(VF, Mask, UseMask::SecondArg))
+                   .all()) {
       // Peek through shuffles.
       Value *Op1 = V1;
       Value *Op2 = V2;
@@ -13454,7 +13457,9 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
   Builder.SetCurrentDebugLocation(Front->getDebugLoc());
 }
 
-Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
+Value *BoUpSLP::gather(
+    ArrayRef<Value *> VL, Value *Root, Type *ScalarTy,
+    function_ref<Value *(Value *, Value *, ArrayRef<int>)> CreateShuffle) {
   // List of instructions/lanes from current block and/or the blocks which are
   // part of the current loop. These instructions will be inserted at the end to
   // make it possible to optimize loops and hoist invariant instructions out of
@@ -13560,7 +13565,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
     if (isa<PoisonValue>(Vec)) {
       Vec = OriginalRoot;
     } else {
-      Vec = Builder.CreateShuffleVector(Root, Vec, Mask);
+      Vec = CreateShuffle(Root, Vec, Mask);
       if (auto *OI = dyn_cast<Instruction>(OriginalRoot);
           OI && OI->hasNUses(0))
         eraseInstruction(OI);
@@ -14022,7 +14027,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
   }
   Value *gather(ArrayRef<Value *> VL, unsigned MaskVF = 0,
                 Value *Root = nullptr) {
-    return R.gather(VL, Root, ScalarTy);
+    return R.gather(VL, Root, ScalarTy,
+                    [&](Value *V1, Value *V2, ArrayRef<int> Mask) {
+                      return createShuffle(V1, V2, Mask);
+                    });
   }
   Value *createFreeze(Value *V) { return Builder.CreateFreeze(V); }
   /// Finalize emission of the shuffles.

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle-with-root.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle-with-root.ll
new file mode 100644
index 00000000000000..6374cddc7346ca
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle-with-root.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test(i16 %arg) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i16 [[ARG:%.*]]) {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i16> <i16 0, i16 poison>, i16 [[ARG]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = sitofp <2 x i16> [[TMP0]] to <2 x float>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> <float 0.000000e+00, float poison, float poison, float poison>, <4 x i32> <i32 4, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x float> zeroinitializer, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fsub <4 x float> zeroinitializer, [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP8:%.*]] = fsub <4 x float> [[TMP7]], [[TMP2]]
+; CHECK-NEXT:    store <4 x float> [[TMP8]], ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 20), align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %sitofp = sitofp i16 %arg to float
+  %fadd = fadd float 0.000000e+00, 0.000000e+00
+  %fsub = fsub float 0.000000e+00, %sitofp
+  %fsub1 = fsub float 0.000000e+00, %sitofp
+  %fsub2 = fsub float 0.000000e+00, %sitofp
+  %sitofp3 = sitofp i16 0 to float
+  %fsub4 = fsub float %fadd, %sitofp3
+  store float %fsub4, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 20), align 4
+  %fsub5 = fsub float %fsub, %sitofp
+  store float %fsub5, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 24), align 4
+  %fsub6 = fsub float %fsub1, %sitofp
+  store float %fsub6, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 28), align 4
+  %fsub7 = fsub float %fsub2, %sitofp
+  store float %fsub7, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 32), align 4
+  ret void
+}