[llvm] a893b44 - [SLP]Disable scheduling of insertelements.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 10 10:26:41 PDT 2021


Author: Alexey Bataev
Date: 2021-06-10T10:25:26-07:00
New Revision: a893b441873d4dd0ba849a722797f40da8600c39

URL: https://github.com/llvm/llvm-project/commit/a893b441873d4dd0ba849a722797f40da8600c39
DIFF: https://github.com/llvm/llvm-project/commit/a893b441873d4dd0ba849a722797f40da8600c39.diff

LOG: [SLP]Disable scheduling of insertelements.

There is no need to schedule insertelement instructions. The compiler
did not schedule them before it started support their vectorization and
it should not do it after. We pre-schedule them manually when finding
a build vector sequence.
Disabling scheduling of insertelement instructions improves compile
time and vectorization of the very large basic blocks by saving
scheduling budget for other instructions.

Differential Revision: https://reviews.llvm.org/D104026

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ffa91575fb9e2..1c8bdffee400b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2244,7 +2244,6 @@ class BoUpSLP {
           auto *In = TE->getMainOp();
           assert(In &&
                  (isa<ExtractValueInst>(In) || isa<ExtractElementInst>(In) ||
-                  isa<InsertElementInst>(In) ||
                   In->getNumOperands() == TE->getNumOperands()) &&
                  "Missed TreeEntry operands?");
           (void)In; // fake use to avoid build failure when assertions disabled
@@ -5886,7 +5885,7 @@ void BoUpSLP::optimizeGatherSequence() {
 Optional<BoUpSLP::ScheduleData *>
 BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
                                             const InstructionsState &S) {
-  if (isa<PHINode>(S.OpValue))
+  if (isa<PHINode>(S.OpValue) || isa<InsertElementInst>(S.OpValue))
     return nullptr;
 
   // Initialize the instruction bundle.
@@ -5982,7 +5981,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
 
 void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
                                                 Value *OpValue) {
-  if (isa<PHINode>(OpValue))
+  if (isa<PHINode>(OpValue) || isa<InsertElementInst>(OpValue))
     return;
 
   ScheduleData *Bundle = getScheduleData(OpValue);
@@ -6022,7 +6021,8 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
     return true;
   Instruction *I = dyn_cast<Instruction>(V);
   assert(I && "bundle member must be an instruction");
-  assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
+  assert(!isa<PHINode>(I) && !isa<InsertElementInst>(I) &&
+         "phi nodes/insertelements don't need to be scheduled");
   auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool {
     ScheduleData *ISD = getScheduleData(I);
     if (!ISD)
@@ -6165,10 +6165,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
           for (User *U : BundleMember->Inst->users()) {
             if (isa<Instruction>(U)) {
               ScheduleData *UseSD = getScheduleData(U);
-              if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle) &&
-                  // Ignore inner deps for insertelement
-                  !(UseSD->FirstInBundle == SD &&
-                    isa<InsertElementInst>(BundleMember->Inst))) {
+              if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
                 BundleMember->Dependencies++;
                 ScheduleData *DestBundle = UseSD->FirstInBundle;
                 if (!DestBundle->IsScheduled)
@@ -6295,8 +6292,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
   for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
        I = I->getNextNode()) {
     BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) {
-      assert(SD->isPartOfBundle() ==
-                 (getTreeEntry(SD->Inst) != nullptr) &&
+      assert((isa<InsertElementInst>(SD->Inst) ||
+              SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) &&
              "scheduler and vectorizer bundle mismatch");
       SD->FirstInBundle->SchedulingPriority = Idx++;
       if (SD->isSchedulingEntity()) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
index 64b8c7c53f316..9f0fa6a73363d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
@@ -448,14 +448,14 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
 ; MINTREESIZE-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0
 ; MINTREESIZE-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0
 ; MINTREESIZE-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1
-; MINTREESIZE-NEXT:    [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
-; MINTREESIZE-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1
-; MINTREESIZE-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
-; MINTREESIZE-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1
-; MINTREESIZE-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
-; MINTREESIZE-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT:    [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT:    ret <4 x float> [[TMP17]]
+; MINTREESIZE-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
+; MINTREESIZE-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP3]], i32 1
+; MINTREESIZE-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
+; MINTREESIZE-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP2]], i32 1
+; MINTREESIZE-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
+; MINTREESIZE-NEXT:    [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT:    ret <4 x float> [[TMP11]]
 ;
   %a0 = extractelement <4 x float> %a, i32 0
   %b0 = extractelement <4 x float> %b, i32 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 800669f4097c7..1b9d54dd4d4f3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -483,14 +483,14 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
 ; MINTREESIZE-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0
 ; MINTREESIZE-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0
 ; MINTREESIZE-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1
-; MINTREESIZE-NEXT:    [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
-; MINTREESIZE-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1
-; MINTREESIZE-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
-; MINTREESIZE-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1
-; MINTREESIZE-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
-; MINTREESIZE-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT:    [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT:    ret <4 x float> [[TMP17]]
+; MINTREESIZE-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
+; MINTREESIZE-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP3]], i32 1
+; MINTREESIZE-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0
+; MINTREESIZE-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP2]], i32 1
+; MINTREESIZE-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
+; MINTREESIZE-NEXT:    [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT:    ret <4 x float> [[TMP11]]
 ;
   %a0 = extractelement <4 x float> %a, i32 0
   %b0 = extractelement <4 x float> %b, i32 0


        


More information about the llvm-commits mailing list