[llvm] b3ced98 - [SLP]Fix crash on extending scheduling region.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 18 06:12:24 PDT 2021


Author: Alexey Bataev
Date: 2021-03-18T06:11:08-07:00
New Revision: b3ced9852c7e6cc2dab61b6adb5c92812c99b00e

URL: https://github.com/llvm/llvm-project/commit/b3ced9852c7e6cc2dab61b6adb5c92812c99b00e
DIFF: https://github.com/llvm/llvm-project/commit/b3ced9852c7e6cc2dab61b6adb5c92812c99b00e.diff

LOG: [SLP]Fix crash on extending scheduling region.

If SLP vectorizer tries to extend the scheduling region and runs out of
the budget too early, but still extends the region to the new ending
instructions (i.e., it was able to extend the region for the first
instruction in the bundle, but not for the second), the compiler need to
recalculate dependecies in full, just like if the extending was
successfull. Without it, the schedule data chunks may end up with the
wrong number of (unscheduled) dependecies and it may end up with the
incorrect function, where the vectorized instruction does not dominate
on the extractelement instruction.

Differential Revision: https://reviews.llvm.org/D98531

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7c10abe09fdf..0ec802799c22 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5179,11 +5179,53 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
   bool ReSchedule = false;
   LLVM_DEBUG(dbgs() << "SLP:  bundle: " << *S.OpValue << "\n");
 
+  auto &&TryScheduleBundle = [this, OldScheduleEnd, SLP](bool ReSchedule,
+                                                         ScheduleData *Bundle) {
+    // The scheduling region got new instructions at the lower end (or it is a
+    // new region for the first bundle). This makes it necessary to
+    // recalculate all dependencies.
+    // It is seldom that this needs to be done a second time after adding the
+    // initial bundle to the region.
+    if (ScheduleEnd != OldScheduleEnd) {
+      for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode())
+        doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); });
+      ReSchedule = true;
+    }
+    if (ReSchedule) {
+      resetSchedule();
+      initialFillReadyList(ReadyInsts);
+    }
+    if (Bundle) {
+      LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle
+                        << " in block " << BB->getName() << "\n");
+      calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP);
+    }
+
+    // Now try to schedule the new bundle or (if no bundle) just calculate
+    // dependencies. As soon as the bundle is "ready" it means that there are no
+    // cyclic dependencies and we can schedule it. Note that's important that we
+    // don't "schedule" the bundle yet (see cancelScheduling).
+    while (((!Bundle && ReSchedule) || (Bundle && !Bundle->isReady())) &&
+           !ReadyInsts.empty()) {
+      ScheduleData *Picked = ReadyInsts.pop_back_val();
+      if (Picked->isSchedulingEntity() && Picked->isReady())
+        schedule(Picked, ReadyInsts);
+    }
+  };
+
   // Make sure that the scheduling region contains all
   // instructions of the bundle.
   for (Value *V : VL) {
-    if (!extendSchedulingRegion(V, S))
+    if (!extendSchedulingRegion(V, S)) {
+      // If the scheduling region got new instructions at the lower end (or it
+      // is a new region for the first bundle). This makes it necessary to
+      // recalculate all dependencies.
+      // Otherwise the compiler may crash trying to incorrectly calculate
+      // dependencies and emit instruction in the wrong order at the actual
+      // scheduling.
+      TryScheduleBundle(/*ReSchedule=*/false, nullptr);
       return None;
+    }
   }
 
   for (Value *V : VL) {
@@ -5212,42 +5254,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
     BundleMember->FirstInBundle = Bundle;
     PrevInBundle = BundleMember;
   }
-  if (ScheduleEnd != OldScheduleEnd) {
-    // The scheduling region got new instructions at the lower end (or it is a
-    // new region for the first bundle). This makes it necessary to
-    // recalculate all dependencies.
-    // It is seldom that this needs to be done a second time after adding the
-    // initial bundle to the region.
-    for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
-      doForAllOpcodes(I, [](ScheduleData *SD) {
-        SD->clearDependencies();
-      });
-    }
-    ReSchedule = true;
-  }
-  if (ReSchedule) {
-    resetSchedule();
-    initialFillReadyList(ReadyInsts);
-  }
   assert(Bundle && "Failed to find schedule bundle");
-
-  LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block "
-                    << BB->getName() << "\n");
-
-  calculateDependencies(Bundle, true, SLP);
-
-  // Now try to schedule the new bundle. As soon as the bundle is "ready" it
-  // means that there are no cyclic dependencies and we can schedule it.
-  // Note that's important that we don't "schedule" the bundle yet (see
-  // cancelScheduling).
-  while (!Bundle->isReady() && !ReadyInsts.empty()) {
-
-    ScheduleData *pickedSD = ReadyInsts.pop_back_val();
-
-    if (pickedSD->isSchedulingEntity() && pickedSD->isReady()) {
-      schedule(pickedSD, ReadyInsts);
-    }
-  }
+  TryScheduleBundle(ReSchedule, Bundle);
   if (!Bundle->isReady()) {
     cancelScheduling(VL, S.OpValue);
     return None;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
new file mode 100644
index 000000000000..299c2d3642c4
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -slp-min-tree-size=2 -slp-threshold=-1000 -slp-max-look-ahead-depth=1 -slp-look-ahead-users-budget=1 -slp-schedule-budget=27 -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define void @exceed(double %0, double %1) {
+; CHECK-LABEL: @exceed(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
+; CHECK-NEXT:    [[IX:%.*]] = fmul double [[TMP7]], undef
+; CHECK-NEXT:    [[IXX0:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX1:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX2:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX3:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX4:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX5:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IX1:%.*]] = fmul double [[TMP7]], undef
+; CHECK-NEXT:    [[IXX10:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX11:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX12:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX13:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX14:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX15:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX20:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX21:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[IXX22:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
+; CHECK-NEXT:    [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[IXX101:%.*]] = fsub double undef, undef
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double undef, i32 1
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP16:%.*]] = fmul fast <2 x double> [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    switch i32 undef, label [[BB1:%.*]] [
+; CHECK-NEXT:    i32 0, label [[BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[LABEL:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <2 x double> [[TMP16]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x double> poison, double [[TMP17]], i32 0
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[TMP16]], i32 1
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP19]], i32 1
+; CHECK-NEXT:    br label [[LABEL]]
+; CHECK:       label:
+; CHECK-NEXT:    [[TMP21:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP20]], [[BB2]] ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %i10 = fdiv fast double %0, %1
+  %ix = fmul double %i10, undef
+  %ixx0 = fsub double undef, undef
+  %ixx1 = fsub double undef, undef
+  %ixx2 = fsub double undef, undef
+  %ixx3 = fsub double undef, undef
+  %ixx4 = fsub double undef, undef
+  %ixx5 = fsub double undef, undef
+  %ix1 = fmul double %i10, undef
+  %ixx10 = fsub double undef, undef
+  %ixx11 = fsub double undef, undef
+  %ixx12 = fsub double undef, undef
+  %ixx13 = fsub double undef, undef
+  %ixx14 = fsub double undef, undef
+  %ixx15 = fsub double undef, undef
+  %ixx20 = fsub double undef, undef
+  %ixx21 = fsub double undef, undef
+  %ixx22 = fsub double undef, undef
+  %i11 = fdiv fast double %0, %1
+  %ix2 = fmul double %i11, %i11
+  %tmp1 = fadd fast double %i11, %0
+  %tmp2 = fadd fast double %0, %1
+  %tmp5 = fmul fast double %tmp1, %tmp2
+  %tmp15 = fadd fast double %i10, %1
+  %tmp25 = fadd fast double %0, %1
+  %tmp6 = fmul fast double %tmp15, %tmp25
+  %tmp555 = fmul fast double %i10, undef
+  %ixx101 = fsub double undef, undef
+  %tmp666 = fmul fast double %1, undef
+  switch i32 undef, label %bb1 [
+  i32 0, label %bb2
+  ]
+
+bb1:                                              ; preds = %entry
+  br label %label
+
+bb2:                                              ; preds = %entry
+  br label %label
+
+label:                                            ; preds = %bb2, %bb1
+  %phi1 = phi double [ %tmp5, %bb1 ], [ %tmp555, %bb2 ]
+  %phi2 = phi double [ %tmp6, %bb1 ], [ %tmp666, %bb2 ]
+  ret void
+}


        


More information about the llvm-commits mailing list