[llvm] 8e382ae - [Support] Simplify parallelForEach{,N}
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 23 10:35:50 PST 2022
Author: Fangrui Song
Date: 2022-01-23T10:35:44-08:00
New Revision: 8e382ae91b97161930a128e56774d6e1242b6514
URL: https://github.com/llvm/llvm-project/commit/8e382ae91b97161930a128e56774d6e1242b6514
DIFF: https://github.com/llvm/llvm-project/commit/8e382ae91b97161930a128e56774d6e1242b6514.diff
LOG: [Support] Simplify parallelForEach{,N}
* Merge parallel_for_each into parallelForEach (this removes 1 `Fn(...)` call)
* Change parallelForEach to use parallelForEachN
* Move parallelForEachN into Parallel.cpp
My x86-64 `lld` executable is 100KiB smaller.
No noticeable difference in performance.
Reviewed By: lattner
Differential Revision: https://reviews.llvm.org/D117510
Added:
Modified:
llvm/include/llvm/Support/Parallel.h
llvm/lib/Support/Parallel.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
index 5c3b26d5754c2..04caf5eac961d 100644
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -130,64 +130,6 @@ void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
// improving to take the number of available cores into account.)
enum { MaxTasksPerGroup = 1024 };
-template <class IterTy, class FuncTy>
-void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
- // If we have zero or one items, then do not incur the overhead of spinning up
- // a task group. They are surprisingly expensive, and because they do not
- // support nested parallelism, a single entry task group can block parallel
- // execution underneath them.
- auto NumItems = std::distance(Begin, End);
- if (NumItems <= 1) {
- if (NumItems)
- Fn(*Begin);
- return;
- }
-
- // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
- // overhead on large inputs.
- ptr
diff _t TaskSize = NumItems / MaxTasksPerGroup;
- if (TaskSize == 0)
- TaskSize = 1;
-
- TaskGroup TG;
- while (TaskSize < std::distance(Begin, End)) {
- TG.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); });
- Begin += TaskSize;
- }
- std::for_each(Begin, End, Fn);
-}
-
-template <class IndexTy, class FuncTy>
-void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
- // If we have zero or one items, then do not incur the overhead of spinning up
- // a task group. They are surprisingly expensive, and because they do not
- // support nested parallelism, a single entry task group can block parallel
- // execution underneath them.
- auto NumItems = End - Begin;
- if (NumItems <= 1) {
- if (NumItems)
- Fn(Begin);
- return;
- }
-
- // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
- // overhead on large inputs.
- ptr
diff _t TaskSize = NumItems / MaxTasksPerGroup;
- if (TaskSize == 0)
- TaskSize = 1;
-
- TaskGroup TG;
- IndexTy I = Begin;
- for (; I + TaskSize < End; I += TaskSize) {
- TG.spawn([=, &Fn] {
- for (IndexTy J = I, E = I + TaskSize; J != E; ++J)
- Fn(J);
- });
- }
- for (IndexTy J = I; J < End; ++J)
- Fn(J);
-}
-
template <class IterTy, class ResultTy, class ReduceFuncTy,
class TransformFuncTy>
ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init,
@@ -251,27 +193,11 @@ void parallelSort(RandomAccessIterator Start, RandomAccessIterator End,
llvm::sort(Start, End, Comp);
}
+void parallelForEachN(size_t Begin, size_t End, function_ref<void(size_t)> Fn);
+
template <class IterTy, class FuncTy>
void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) {
-#if LLVM_ENABLE_THREADS
- if (parallel::strategy.ThreadsRequested != 1) {
- parallel::detail::parallel_for_each(Begin, End, Fn);
- return;
- }
-#endif
- std::for_each(Begin, End, Fn);
-}
-
-template <class FuncTy>
-void parallelForEachN(size_t Begin, size_t End, FuncTy Fn) {
-#if LLVM_ENABLE_THREADS
- if (parallel::strategy.ThreadsRequested != 1) {
- parallel::detail::parallel_for_each_n(Begin, End, Fn);
- return;
- }
-#endif
- for (size_t I = Begin; I != End; ++I)
- Fn(I);
+ parallelForEachN(0, End - Begin, [&](size_t I) { Fn(Begin[I]); });
}
template <class IterTy, class ResultTy, class ReduceFuncTy,
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 71e3a1362f7eb..4977c188f934f 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -174,3 +174,35 @@ void TaskGroup::spawn(std::function<void()> F) {
} // namespace parallel
} // namespace llvm
#endif // LLVM_ENABLE_THREADS
+
+void llvm::parallelForEachN(size_t Begin, size_t End,
+ llvm::function_ref<void(size_t)> Fn) {
+ // If we have zero or one items, then do not incur the overhead of spinning up
+ // a task group. They are surprisingly expensive, and because they do not
+ // support nested parallelism, a single entry task group can block parallel
+ // execution underneath them.
+#if LLVM_ENABLE_THREADS
+ auto NumItems = End - Begin;
+ if (NumItems > 1 && parallel::strategy.ThreadsRequested != 1) {
+ // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
+ // overhead on large inputs.
+ auto TaskSize = NumItems / parallel::detail::MaxTasksPerGroup;
+ if (TaskSize == 0)
+ TaskSize = 1;
+
+ parallel::detail::TaskGroup TG;
+ for (; Begin + TaskSize < End; Begin += TaskSize) {
+ TG.spawn([=, &Fn] {
+ for (size_t I = Begin, E = Begin + TaskSize; I != E; ++I)
+ Fn(I);
+ });
+ }
+ for (; Begin != End; ++Begin)
+ Fn(Begin);
+ return;
+ }
+#endif
+
+ for (; Begin != End; ++Begin)
+ Fn(Begin);
+}
More information about the llvm-commits
mailing list