[llvm] 4f57562 - llvm-reduce: Try to kill parallel workitems once we have a result.

Wed Jan 11 05:58:28 PST 2023

Author: Matt Arsenault
Date: 2023-01-11T08:58:22-05:00
New Revision: 4f575620d51032cf98424c9defafe4dfc8d66f45

URL: https://github.com/llvm/llvm-project/commit/4f575620d51032cf98424c9defafe4dfc8d66f45
DIFF: https://github.com/llvm/llvm-project/commit/4f575620d51032cf98424c9defafe4dfc8d66f45.diff

LOG: llvm-reduce: Try to kill parallel workitems once we have a result.

The current reduction logic tries to reproduce what a serial reduction
would produce, and just takes the first one that is still
interesting. We still have to wait for all others to complete though,
which at that point is just a waste.

This helps speed things up with long running reducers, which I
frequently have. e.g. for the added sleep test on my system, it took
about 8 seconds before this change and about 4 after.

https://reviews.llvm.org/D138953

Added: 
    llvm/test/tools/llvm-reduce/Inputs/sleep.py
    llvm/test/tools/llvm-reduce/parallel-workitem-kill.ll

Modified: 
    llvm/tools/llvm-reduce/TestRunner.cpp
    llvm/tools/llvm-reduce/TestRunner.h
    llvm/tools/llvm-reduce/deltas/Delta.cpp
    llvm/tools/llvm-reduce/llvm-reduce.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-reduce/Inputs/sleep.py b/llvm/test/tools/llvm-reduce/Inputs/sleep.py
new file mode 100755
index 0000000000000..8fd230ab74eb6

--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/Inputs/sleep.py
@@ -0,0 +1,8 @@
+#!/bin/python
+
+import time
+import sys
+
+sleep_seconds = int(sys.argv[1])
+time.sleep(sleep_seconds)
+

diff  --git a/llvm/test/tools/llvm-reduce/parallel-workitem-kill.ll b/llvm/test/tools/llvm-reduce/parallel-workitem-kill.ll
new file mode 100644
index 0000000000000..8003548c7655e
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/parallel-workitem-kill.ll
@@ -0,0 +1,52 @@
+; REQUIRES: thread_support
+; RUN: llvm-reduce --process-poll-interval=1 -j 4 %s -o %t --delta-passes=instructions --test %python --test-arg %S/Inputs/sleep.py --test-arg 2
+; RUN: FileCheck %s < %t
+
+; CHECK: define void @foo
+; CHECK-NEXT: ret void
+
+define void @foo(ptr %ptr) {
+  store i32 0, ptr %ptr
+  store i32 1, ptr %ptr
+  store i32 2, ptr %ptr
+  store i32 3, ptr %ptr
+  store i32 4, ptr %ptr
+  store i32 5, ptr %ptr
+  store i32 6, ptr %ptr
+  store i32 7, ptr %ptr
+  store i32 8, ptr %ptr
+  store i32 9, ptr %ptr
+  store i32 10, ptr %ptr
+  store i32 11, ptr %ptr
+  store i32 12, ptr %ptr
+  store i32 13, ptr %ptr
+  store i32 14, ptr %ptr
+  store i32 15, ptr %ptr
+  store i32 16, ptr %ptr
+  store i32 17, ptr %ptr
+  store i32 18, ptr %ptr
+  store i32 19, ptr %ptr
+  store i32 20, ptr %ptr
+  store i32 21, ptr %ptr
+  store i32 22, ptr %ptr
+  store i32 23, ptr %ptr
+  store i32 24, ptr %ptr
+  store i32 25, ptr %ptr
+  store i32 26, ptr %ptr
+  store i32 27, ptr %ptr
+  store i32 28, ptr %ptr
+  store i32 29, ptr %ptr
+  store i32 30, ptr %ptr
+  store i32 31, ptr %ptr
+  store i32 32, ptr %ptr
+  store i32 33, ptr %ptr
+  store i32 34, ptr %ptr
+  store i32 35, ptr %ptr
+  store i32 36, ptr %ptr
+  store i32 37, ptr %ptr
+  store i32 38, ptr %ptr
+  store i32 39, ptr %ptr
+  store i32 40, ptr %ptr
+  ret void
+}
+

diff  --git a/llvm/tools/llvm-reduce/TestRunner.cpp b/llvm/tools/llvm-reduce/TestRunner.cpp
index 3a5483cdae1c2..1ec761a968f08 100644
--- a/llvm/tools/llvm-reduce/TestRunner.cpp
+++ b/llvm/tools/llvm-reduce/TestRunner.cpp
@@ -18,6 +18,12 @@
 
 using namespace llvm;
 
+extern cl::OptionCategory LLVMReduceOptions;
+static cl::opt<unsigned> PollInterval("process-poll-interval",
+                                      cl::desc("child process wait polling"),
+                                      cl::init(5), cl::Hidden,
+                                      cl::cat(LLVMReduceOptions));
+
 TestRunner::TestRunner(StringRef TestName,
                        const std::vector<std::string> &TestArgs,
                        std::unique_ptr<ReducerWorkItem> Program,
@@ -37,7 +43,7 @@ static constexpr std::array<std::optional<StringRef>, 3> NullRedirects;
 
 /// Runs the interestingness test, passes file to be tested as first argument
 /// and other specified test arguments after that.
-int TestRunner::run(StringRef Filename) const {
+int TestRunner::run(StringRef Filename, const std::atomic<bool> &Killed) const {
   std::vector<StringRef> ProgramArgs;
   ProgramArgs.push_back(TestName);
 
@@ -47,13 +53,13 @@ int TestRunner::run(StringRef Filename) const {
   ProgramArgs.push_back(Filename);
 
   std::string ErrMsg;
+  bool ExecutionFailed;
+  sys::ProcessInfo PI =
+      sys::ExecuteNoWait(TestName, ProgramArgs, /*Env=*/std::nullopt,
+                         Verbose ? DefaultRedirects : NullRedirects,
+                         /*MemoryLimit=*/0, &ErrMsg, &ExecutionFailed);
 
-  int Result =
-      sys::ExecuteAndWait(TestName, ProgramArgs, /*Env=*/std::nullopt,
-                          Verbose ? DefaultRedirects : NullRedirects,
-                          /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
-
-  if (Result < 0) {
+  if (ExecutionFailed) {
     Error E = make_error<StringError>("Error running interesting-ness test: " +
                                           ErrMsg,
                                       inconvertibleErrorCode());
@@ -61,7 +67,25 @@ int TestRunner::run(StringRef Filename) const {
     exit(1);
   }
 
-  return !Result;
+  // Poll every few seconds, taking a break to check if we should try to kill
+  // the process. We're trying to early exit on long running parallel reductions
+  // once we know they don't matter.
+  std::optional<unsigned> SecondsToWait(PollInterval);
+  bool Polling = true;
+  sys::ProcessInfo WaitPI;
+
+  while (WaitPI.Pid == 0) { // Process has not changed state.
+    WaitPI = sys::Wait(PI, SecondsToWait, &ErrMsg, nullptr, Polling);
+    // TODO: This should probably be std::atomic_flag
+    if (Killed) {
+      // The current Program API does not have a way to directly kill, but we
+      // can timeout after 0 seconds.
+      SecondsToWait = 0;
+      Polling = false;
+    }
+  }
+
+  return !WaitPI.ReturnCode;
 }
 
 void TestRunner::setProgram(std::unique_ptr<ReducerWorkItem> P) {

diff  --git a/llvm/tools/llvm-reduce/TestRunner.h b/llvm/tools/llvm-reduce/TestRunner.h
index 128eede0feeae..7af1096ff0a6b 100644
--- a/llvm/tools/llvm-reduce/TestRunner.h
+++ b/llvm/tools/llvm-reduce/TestRunner.h
@@ -33,7 +33,7 @@ class TestRunner {
 
   /// Runs the interesting-ness test for the specified file
   /// @returns 0 if test was successful, 1 if otherwise
-  int run(StringRef Filename) const;
+  int run(StringRef Filename, const std::atomic<bool> &Killed) const;
 
   /// Returns the most reduced version of the original testcase
   ReducerWorkItem &getProgram() const { return *Program; }

diff  --git a/llvm/tools/llvm-reduce/deltas/Delta.cpp b/llvm/tools/llvm-reduce/deltas/Delta.cpp
index 0def68c5f11c4..689d4fc6855d0 100644
--- a/llvm/tools/llvm-reduce/deltas/Delta.cpp
+++ b/llvm/tools/llvm-reduce/deltas/Delta.cpp
@@ -65,7 +65,8 @@ void writeBitcode(ReducerWorkItem &M, raw_ostream &OutStream);
 void readBitcode(ReducerWorkItem &M, MemoryBufferRef Data, LLVMContext &Ctx,
                  StringRef ToolName);
 
-bool isReduced(ReducerWorkItem &M, const TestRunner &Test) {
+bool isReduced(ReducerWorkItem &M, const TestRunner &Test,
+               const std::atomic<bool> &Killed) {
   const bool UseBitcode = Test.inputIsBitcode() || TmpFilesAsBitcode;
 
   SmallString<128> CurrentFilepath;
@@ -96,7 +97,7 @@ bool isReduced(ReducerWorkItem &M, const TestRunner &Test) {
   }
 
   // Current Chunks aren't interesting
-  return Test.run(CurrentFilepath);
+  return Test.run(CurrentFilepath, Killed);
 }
 
 /// Splits Chunks in half and prints them.
@@ -138,7 +139,8 @@ CheckChunk(const Chunk &ChunkToCheckForUninterestingness,
            std::unique_ptr<ReducerWorkItem> Clone, const TestRunner &Test,
            ReductionFunc ExtractChunksFromModule,
            const DenseSet<Chunk> &UninterestingChunks,
-           const std::vector<Chunk> &ChunksStillConsideredInteresting) {
+           const std::vector<Chunk> &ChunksStillConsideredInteresting,
+           const std::atomic<bool> &Killed) {
   // Take all of ChunksStillConsideredInteresting chunks, except those we've
   // already deemed uninteresting (UninterestingChunks) but didn't remove
   // from ChunksStillConsideredInteresting yet, and additionally ignore
@@ -178,7 +180,7 @@ CheckChunk(const Chunk &ChunkToCheckForUninterestingness,
     errs() << "\n";
   }
 
-  if (!isReduced(*Clone, Test)) {
+  if (!isReduced(*Clone, Test, Killed)) {
     // Program became non-reduced, so this chunk appears to be interesting.
     if (Verbose)
       errs() << "\n";
@@ -191,7 +193,8 @@ static SmallString<0> ProcessChunkFromSerializedBitcode(
     Chunk &ChunkToCheckForUninterestingness, TestRunner &Test,
     ReductionFunc ExtractChunksFromModule, DenseSet<Chunk> &UninterestingChunks,
     std::vector<Chunk> &ChunksStillConsideredInteresting,
-    SmallString<0> &OriginalBC, std::atomic<bool> &AnyReduced) {
+    SmallString<0> &OriginalBC, std::atomic<bool> &AnyReduced,
+    const std::atomic<bool> &Killed) {
   LLVMContext Ctx;
   auto CloneMMM = std::make_unique<ReducerWorkItem>();
   MemoryBufferRef Data(StringRef(OriginalBC), "<bc file>");
@@ -201,7 +204,7 @@ static SmallString<0> ProcessChunkFromSerializedBitcode(
   if (std::unique_ptr<ReducerWorkItem> ChunkResult =
           CheckChunk(ChunkToCheckForUninterestingness, std::move(CloneMMM),
                      Test, ExtractChunksFromModule, UninterestingChunks,
-                     ChunksStillConsideredInteresting)) {
+                     ChunksStillConsideredInteresting, Killed)) {
     raw_svector_ostream BCOS(Result);
     writeBitcode(*ChunkResult, BCOS);
     // Communicate that the task reduced a chunk.
@@ -242,7 +245,7 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule,
 
     assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) &&
            "input module is broken after counting chunks");
-    assert(isReduced(Test.getProgram(), Test) &&
+    assert(isReduced(Test.getProgram(), Test, std::atomic<bool>()) &&
            "input module no longer interesting after counting chunks");
 
 #ifndef NDEBUG
@@ -290,6 +293,11 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule,
       writeBitcode(Test.getProgram(), BCOS);
     }
 
+    // If doing parallel reduction, signal to running workitem threads that we
+    // no longer care about their results. They should try to kill the reducer
+    // workitem process and exit.
+    std::atomic<bool> Killed = false;
+
     SharedTaskQueue TaskQueue;
     for (auto I = ChunksStillConsideredInteresting.rbegin(),
               E = ChunksStillConsideredInteresting.rend();
@@ -316,11 +324,12 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule,
         for (unsigned J = 0; J < NumInitialTasks; ++J) {
           TaskQueue.emplace_back(ChunkThreadPool.async(
               [J, I, &Test, &ExtractChunksFromModule, &UninterestingChunks,
-               &ChunksStillConsideredInteresting, &OriginalBC, &AnyReduced]() {
+               &ChunksStillConsideredInteresting, &OriginalBC, &AnyReduced,
+               &Killed]() {
                 return ProcessChunkFromSerializedBitcode(
                     *(I + J), Test, ExtractChunksFromModule,
                     UninterestingChunks, ChunksStillConsideredInteresting,
-                    OriginalBC, AnyReduced);
+                    OriginalBC, AnyReduced, Killed);
               }));
         }
 
@@ -344,11 +353,11 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule,
               TaskQueue.emplace_back(ChunkThreadPool.async(
                   [&Test, &ExtractChunksFromModule, &UninterestingChunks,
                    &ChunksStillConsideredInteresting, &OriginalBC,
-                   &ChunkToCheck, &AnyReduced]() {
+                   &ChunkToCheck, &AnyReduced, &Killed]() {
                     return ProcessChunkFromSerializedBitcode(
                         ChunkToCheck, Test, ExtractChunksFromModule,
                         UninterestingChunks, ChunksStillConsideredInteresting,
-                        OriginalBC, AnyReduced);
+                        OriginalBC, AnyReduced, Killed);
                   }));
             }
             continue;
@@ -361,6 +370,8 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule,
           break;
         }
 
+        Killed = true;
+
         // If we broke out of the loop, we still need to wait for everything to
         // avoid race access to the chunk set.
         //
@@ -375,7 +386,7 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule,
             *I,
             cloneReducerWorkItem(Test.getProgram(), Test.getTargetMachine()),
             Test, ExtractChunksFromModule, UninterestingChunks,
-            ChunksStillConsideredInteresting);
+            ChunksStillConsideredInteresting, Killed);
       }
 
       if (!Result)

diff  --git a/llvm/tools/llvm-reduce/llvm-reduce.cpp b/llvm/tools/llvm-reduce/llvm-reduce.cpp
index 07a04a6ccd8fa..8f064bc1e1199 100644
--- a/llvm/tools/llvm-reduce/llvm-reduce.cpp
+++ b/llvm/tools/llvm-reduce/llvm-reduce.cpp
@@ -101,7 +101,8 @@ static cl::opt<int>
 
 static codegen::RegisterCodeGenFlags CGF;
 
-bool isReduced(ReducerWorkItem &M, const TestRunner &Test);
+bool isReduced(ReducerWorkItem &M, const TestRunner &Test,
+               const std::atomic<bool> &Killed);
 
 /// Turn off crash debugging features
 ///
@@ -217,7 +218,7 @@ int main(int Argc, char **Argv) {
   // test, rather than evaluating the source IR directly. This is for the
   // convenience of lit tests; the stripped out comments may have broken the
   // interestingness checks.
-  if (!isReduced(Tester.getProgram(), Tester)) {
+  if (!isReduced(Tester.getProgram(), Tester, std::atomic<bool>())) {
     errs() << "\nInput isn't interesting! Verify interesting-ness test\n";
     return 1;
   }