[clang] f4cf51c - [clang][CFG] Add support for partitioning CFG into intervals.
Yitzhak Mandelbaum via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 27 10:08:01 PDT 2023
Author: Yitzhak Mandelbaum
Date: 2023-06-27T17:07:54Z
New Revision: f4cf51c99c74f46a490e0ae265da8fba298d800b
URL: https://github.com/llvm/llvm-project/commit/f4cf51c99c74f46a490e0ae265da8fba298d800b
DIFF: https://github.com/llvm/llvm-project/commit/f4cf51c99c74f46a490e0ae265da8fba298d800b.diff
LOG: [clang][CFG] Add support for partitioning CFG into intervals.
Adds support for the classic dataflow algorithm that partitions a flow graph
into distinct intervals. C.f. Dragon book, pp. 664-666.
A version of this algorithm exists in LLVM (see llvm/Analysis/Interval.h and
related files), but it is specific to LLVM, is a recursive (vs iterative)
algorithm, and uses many layers of abstraction that seem unnecessary for CFG
purposes.
This patch is part 1 of 2. The next patch will generalize the code to work on
intervals, to support computation of the limit flow graph.
Differential Revision: https://reviews.llvm.org/D152263
Added:
clang/include/clang/Analysis/Analyses/IntervalPartition.h
clang/lib/Analysis/IntervalPartition.cpp
clang/unittests/Analysis/IntervalPartitionTest.cpp
Modified:
clang/lib/Analysis/CMakeLists.txt
clang/unittests/Analysis/CMakeLists.txt
Removed:
################################################################################
diff --git a/clang/include/clang/Analysis/Analyses/IntervalPartition.h b/clang/include/clang/Analysis/Analyses/IntervalPartition.h
new file mode 100644
index 0000000000000..cc04bab7bf6c4
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/IntervalPartition.h
@@ -0,0 +1,50 @@
+//===- IntervalPartition.h - CFG Partitioning into Intervals -----*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functionality for partitioning a CFG into intervals. The
+// concepts and implementations are based on the presentation in "Compilers" by
+// Aho, Sethi and Ullman (the "dragon book"), pages 664-666.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_INTERVALPARTITION_H
+#define LLVM_CLANG_ANALYSIS_ANALYSES_INTERVALPARTITION_H
+
+#include "clang/Analysis/CFG.h"
+#include "llvm/ADT/DenseSet.h"
+#include <vector>
+
+namespace clang {
+
+// An interval is a strongly-connected component of the CFG along with a
+// trailing acyclic structure. The _header_ of the interval is either the CFG
+// entry block or has at least one predecessor outside of the interval. All
+// other blocks in the interval have only predecessors also in the interval.
+struct CFGInterval {
+ CFGInterval(const CFGBlock *Header) : Header(Header), Blocks({Header}) {}
+
+ // The block from which the interval was constructed. Is either the CFG entry
+ // block or has at least one predecessor outside the interval.
+ const CFGBlock *Header;
+
+ llvm::SmallDenseSet<const CFGBlock *> Blocks;
+
+ // Successor blocks of the *interval*: blocks outside the interval for
+ // reachable (in one edge) from within the interval.
+ llvm::SmallDenseSet<const CFGBlock *> Successors;
+};
+
+CFGInterval buildInterval(const CFG &Cfg, const CFGBlock &Header);
+
+// Partitions `Cfg` into intervals and constructs a graph of the intervals,
+// based on the edges between nodes in these intervals.
+std::vector<CFGInterval> partitionIntoIntervals(const CFG &Cfg);
+
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_ANALYSES_INTERVALPARTITION_H
diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt
index ea6cb59e8e199..d029926c76e89 100644
--- a/clang/lib/Analysis/CMakeLists.txt
+++ b/clang/lib/Analysis/CMakeLists.txt
@@ -18,6 +18,7 @@ add_clang_library(clangAnalysis
CodeInjector.cpp
Dominators.cpp
ExprMutationAnalyzer.cpp
+ IntervalPartition.cpp
IssueHash.cpp
LiveVariables.cpp
MacroExpansionContext.cpp
diff --git a/clang/lib/Analysis/IntervalPartition.cpp b/clang/lib/Analysis/IntervalPartition.cpp
new file mode 100644
index 0000000000000..9d093d8986f78
--- /dev/null
+++ b/clang/lib/Analysis/IntervalPartition.cpp
@@ -0,0 +1,116 @@
+//===- IntervalPartition.cpp - CFG Partitioning into Intervals --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functionality for partitioning a CFG into intervals.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/Analyses/IntervalPartition.h"
+#include "clang/Analysis/CFG.h"
+#include "llvm/ADT/BitVector.h"
+#include <queue>
+#include <set>
+#include <vector>
+
+namespace clang {
+
+static CFGInterval buildInterval(llvm::BitVector &Partitioned,
+ const CFGBlock &Header) {
+ CFGInterval Interval(&Header);
+ Partitioned.set(Header.getBlockID());
+
+ // Elements must not be null. Duplicates are prevented using `Workset`, below.
+ std::queue<const CFGBlock *> Worklist;
+ llvm::BitVector Workset(Header.getParent()->getNumBlockIDs(), false);
+ for (const CFGBlock *S : Header.succs())
+ if (S != nullptr)
+ if (auto SID = S->getBlockID(); !Partitioned.test(SID)) {
+ // Successors are unique, so we don't test against `Workset` before
+ // adding to `Worklist`.
+ Worklist.push(S);
+ Workset.set(SID);
+ }
+
+ // Contains successors of blocks in the interval that couldn't be added to the
+ // interval on their first encounter. This occurs when they have a predecessor
+ // that is either definitively outside the interval or hasn't been considered
+ // yet. In the latter case, we'll revisit the block through some other path
+ // from the interval. At the end of processing the worklist, we filter out any
+ // that ended up in the interval to produce the output set of interval
+ // successors. It may contain duplicates -- ultimately, all relevant elements
+ // are added to `Interval.Successors`, which is a set.
+ std::vector<const CFGBlock *> MaybeSuccessors;
+
+ while (!Worklist.empty()) {
+ const auto *B = Worklist.front();
+ auto ID = B->getBlockID();
+ Worklist.pop();
+ Workset.reset(ID);
+
+ // Check whether all predecessors are in the interval, in which case `B`
+ // is included as well.
+ bool AllInInterval = true;
+ for (const CFGBlock *P : B->preds())
+ if (Interval.Blocks.find(P) == Interval.Blocks.end()) {
+ MaybeSuccessors.push_back(B);
+ AllInInterval = false;
+ break;
+ }
+ if (AllInInterval) {
+ Interval.Blocks.insert(B);
+ Partitioned.set(ID);
+ for (const CFGBlock *S : B->succs())
+ if (S != nullptr)
+ if (auto SID = S->getBlockID();
+ !Partitioned.test(SID) && !Workset.test(SID)) {
+ Worklist.push(S);
+ Workset.set(SID);
+ }
+ }
+ }
+
+ // Any block successors not in the current interval are interval successors.
+ for (const CFGBlock *B : MaybeSuccessors)
+ if (Interval.Blocks.find(B) == Interval.Blocks.end())
+ Interval.Successors.insert(B);
+
+ return Interval;
+}
+
+CFGInterval buildInterval(const CFG &Cfg, const CFGBlock &Header) {
+ llvm::BitVector Partitioned(Cfg.getNumBlockIDs(), false);
+ return buildInterval(Partitioned, Header);
+}
+
+std::vector<CFGInterval> partitionIntoIntervals(const CFG &Cfg) {
+ std::vector<CFGInterval> Intervals;
+ llvm::BitVector Partitioned(Cfg.getNumBlockIDs(), false);
+ auto &EntryBlock = Cfg.getEntry();
+ Intervals.push_back(buildInterval(Partitioned, EntryBlock));
+
+ std::queue<const CFGBlock *> Successors;
+ for (const auto *S : Intervals[0].Successors)
+ Successors.push(S);
+
+ while (!Successors.empty()) {
+ const auto *B = Successors.front();
+ Successors.pop();
+ if (Partitioned.test(B->getBlockID()))
+ continue;
+
+ // B has not been partitioned, but it has a predecessor that has.
+ CFGInterval I = buildInterval(Partitioned, *B);
+ for (const auto *S : I.Successors)
+ Successors.push(S);
+ Intervals.push_back(std::move(I));
+ }
+
+ return Intervals;
+}
+
+} // namespace clang
diff --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt
index 809c6ac4e813b..cfea57f53f033 100644
--- a/clang/unittests/Analysis/CMakeLists.txt
+++ b/clang/unittests/Analysis/CMakeLists.txt
@@ -8,6 +8,7 @@ add_clang_unittest(ClangAnalysisTests
CFGTest.cpp
CloneDetectionTest.cpp
ExprMutationAnalyzerTest.cpp
+ IntervalPartitionTest.cpp
MacroExpansionContextTest.cpp
UnsafeBufferUsageTest.cpp
)
diff --git a/clang/unittests/Analysis/IntervalPartitionTest.cpp b/clang/unittests/Analysis/IntervalPartitionTest.cpp
new file mode 100644
index 0000000000000..8ae8f9b7dbc2c
--- /dev/null
+++ b/clang/unittests/Analysis/IntervalPartitionTest.cpp
@@ -0,0 +1,164 @@
+//===- unittests/Analysis/IntervalPartitionTest.cpp -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/Analyses/IntervalPartition.h"
+#include "CFGBuildResult.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace analysis {
+namespace {
+
+TEST(BuildInterval, PartitionSimpleOneInterval) {
+
+ const char *Code = R"(void f() {
+ int x = 3;
+ int y = 7;
+ x = y + x;
+ })";
+ BuildResult Result = BuildCFG(Code);
+ EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
+
+ CFG *cfg = Result.getCFG();
+
+ // Basic correctness checks.
+ ASSERT_EQ(cfg->size(), 3u);
+
+ auto &EntryBlock = cfg->getEntry();
+
+ CFGInterval I = buildInterval(*cfg, EntryBlock);
+ EXPECT_EQ(I.Blocks.size(), 3u);
+}
+
+TEST(BuildInterval, PartitionIfThenOneInterval) {
+
+ const char *Code = R"(void f() {
+ int x = 3;
+ if (x > 3)
+ x = 2;
+ else
+ x = 7;
+ x = x + x;
+ })";
+ BuildResult Result = BuildCFG(Code);
+ EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
+
+ CFG *cfg = Result.getCFG();
+
+ // Basic correctness checks.
+ ASSERT_EQ(cfg->size(), 6u);
+
+ auto &EntryBlock = cfg->getEntry();
+
+ CFGInterval I = buildInterval(*cfg, EntryBlock);
+ EXPECT_EQ(I.Blocks.size(), 6u);
+}
+
+using ::testing::UnorderedElementsAre;
+
+TEST(BuildInterval, PartitionWhileMultipleIntervals) {
+
+ const char *Code = R"(void f() {
+ int x = 3;
+ while (x >= 3)
+ --x;
+ x = x + x;
+ })";
+ BuildResult Result = BuildCFG(Code);
+ ASSERT_EQ(BuildResult::BuiltCFG, Result.getStatus());
+
+ CFG *cfg = Result.getCFG();
+ ASSERT_EQ(cfg->size(), 7u);
+
+ auto *EntryBlock = &cfg->getEntry();
+ CFGBlock *InitXBlock = *EntryBlock->succ_begin();
+ CFGBlock *LoopHeadBlock = *InitXBlock->succ_begin();
+
+ CFGInterval I1 = buildInterval(*cfg, *EntryBlock);
+ EXPECT_THAT(I1.Blocks, UnorderedElementsAre(EntryBlock, InitXBlock));
+
+ CFGInterval I2 = buildInterval(*cfg, *LoopHeadBlock);
+ EXPECT_EQ(I2.Blocks.size(), 5u);
+}
+
+TEST(PartitionIntoIntervals, PartitionIfThenOneInterval) {
+ const char *Code = R"(void f() {
+ int x = 3;
+ if (x > 3)
+ x = 2;
+ else
+ x = 7;
+ x = x + x;
+ })";
+ BuildResult Result = BuildCFG(Code);
+ ASSERT_EQ(BuildResult::BuiltCFG, Result.getStatus());
+
+ CFG *cfg = Result.getCFG();
+ ASSERT_EQ(cfg->size(), 6u);
+
+ auto Intervals = partitionIntoIntervals(*cfg);
+ EXPECT_EQ(Intervals.size(), 1u);
+}
+
+TEST(PartitionIntoIntervals, PartitionWhileTwoIntervals) {
+ const char *Code = R"(void f() {
+ int x = 3;
+ while (x >= 3)
+ --x;
+ x = x + x;
+ })";
+ BuildResult Result = BuildCFG(Code);
+ ASSERT_EQ(BuildResult::BuiltCFG, Result.getStatus());
+
+ CFG *cfg = Result.getCFG();
+ ASSERT_EQ(cfg->size(), 7u);
+
+ auto Intervals = partitionIntoIntervals(*cfg);
+ EXPECT_EQ(Intervals.size(), 2u);
+}
+
+TEST(PartitionIntoIntervals, PartitionNestedWhileThreeIntervals) {
+ const char *Code = R"(void f() {
+ int x = 3;
+ while (x >= 3) {
+ --x;
+ int y = x;
+ while (y > 0) --y;
+ }
+ x = x + x;
+ })";
+ BuildResult Result = BuildCFG(Code);
+ ASSERT_EQ(BuildResult::BuiltCFG, Result.getStatus());
+
+ CFG *cfg = Result.getCFG();
+ auto Intervals = partitionIntoIntervals(*cfg);
+ EXPECT_EQ(Intervals.size(), 3u);
+}
+
+TEST(PartitionIntoIntervals, PartitionSequentialWhileThreeIntervals) {
+ const char *Code = R"(void f() {
+ int x = 3;
+ while (x >= 3) {
+ --x;
+ }
+ x = x + x;
+ int y = x;
+ while (y > 0) --y;
+ })";
+ BuildResult Result = BuildCFG(Code);
+ ASSERT_EQ(BuildResult::BuiltCFG, Result.getStatus());
+
+ CFG *cfg = Result.getCFG();
+ auto Intervals = partitionIntoIntervals(*cfg);
+ EXPECT_EQ(Intervals.size(), 3u);
+}
+
+} // namespace
+} // namespace analysis
+} // namespace clang
More information about the cfe-commits
mailing list