[clang] [clang] Use a Worklist for some CodeGenFunctions (PR #115395)
Stephen Verderame via cfe-commits
cfe-commits at lists.llvm.org
Mon Dec 2 17:39:02 PST 2024
https://github.com/stephenverderame updated https://github.com/llvm/llvm-project/pull/115395
>From 71702aca23fcc8e2d104d483a40ae16a4a30e48d Mon Sep 17 00:00:00 2001
From: Stephen Verderame <s_verderame at apple.com>
Date: Mon, 21 Oct 2024 10:01:43 -0700
Subject: [PATCH] [clang] Use worklist for some CodeGenFunctions
This converts some CodeGenFunctions that are called when emitting switch
statements to be iterative. Recursive implementations of functions
used to determine when certain cases can be omitted can cause a stack
overflow when attempting to generate IR for deeply nested branches or
switch cases.
---
clang/lib/CodeGen/CGStmt.cpp | 5 +-
clang/lib/CodeGen/CodeGenFunction.cpp | 110 +++++++++++++---------
clang/test/CodeGen/switch-case-overflow.c | 29 ++++++
3 files changed, 95 insertions(+), 49 deletions(-)
create mode 100644 clang/test/CodeGen/switch-case-overflow.c
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 41dc91c578c800..431ba53d059e7e 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -1879,7 +1879,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
// If this is the switchcase (case 4: or default) that we're looking for, then
// we're in business. Just add the substatement.
- if (const SwitchCase *SC = dyn_cast<SwitchCase>(S)) {
+ while (const SwitchCase *SC = dyn_cast<SwitchCase>(S)) {
if (S == Case) {
FoundCase = true;
return CollectStatementsForCase(SC->getSubStmt(), nullptr, FoundCase,
@@ -1887,8 +1887,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
}
// Otherwise, this is some other case or default statement, just ignore it.
- return CollectStatementsForCase(SC->getSubStmt(), Case, FoundCase,
- ResultStmts);
+ S = SC->getSubStmt();
}
// If we are in the live part of the code and we found our break statement,
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 6ead45793742d6..8b6d7c6b5746c3 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1619,31 +1619,38 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
/// this statement is not executed normally, it not containing a label means
/// that we can just remove the code.
bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
- // Null statement, not a label!
- if (!S) return false;
+ llvm::SmallVector<std::pair<const Stmt *, bool>, 32> WorkList;
+ WorkList.emplace_back(S, IgnoreCaseStmts);
- // If this is a label, we have to emit the code, consider something like:
- // if (0) { ... foo: bar(); } goto foo;
- //
- // TODO: If anyone cared, we could track __label__'s, since we know that you
- // can't jump to one from outside their declared region.
- if (isa<LabelStmt>(S))
- return true;
+ while (!WorkList.empty()) {
+ auto [CurStmt, CurIgnoreCaseStmts] = WorkList.pop_back_val();
- // If this is a case/default statement, and we haven't seen a switch, we have
- // to emit the code.
- if (isa<SwitchCase>(S) && !IgnoreCaseStmts)
- return true;
+ // Null statement, not a label!
+ if (!CurStmt)
+ continue;
- // If this is a switch statement, we want to ignore cases below it.
- if (isa<SwitchStmt>(S))
- IgnoreCaseStmts = true;
+ // If this is a label, we have to emit the code, consider something like:
+ // if (0) { ... foo: bar(); } goto foo;
+ //
+ // TODO: If anyone cared, we could track __label__'s, since we know that you
+ // can't jump to one from outside their declared region.
+ if (isa<LabelStmt>(CurStmt))
+ return true;
- // Scan subexpressions for verboten labels.
- for (const Stmt *SubStmt : S->children())
- if (ContainsLabel(SubStmt, IgnoreCaseStmts))
+ // If this is a case/default statement, and we haven't seen a switch, we
+ // have to emit the code.
+ if (isa<SwitchCase>(CurStmt) && !CurIgnoreCaseStmts)
return true;
+ // If this is a switch statement, we want to ignore cases below it.
+ if (isa<SwitchStmt>(CurStmt))
+ CurIgnoreCaseStmts = true;
+
+ // Scan subexpressions for verboten labels.
+ for (const Stmt *SubStmt : CurStmt->children())
+ WorkList.emplace_back(SubStmt, CurIgnoreCaseStmts);
+ }
+
return false;
}
@@ -1651,46 +1658,57 @@ bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
/// If the statement (recursively) contains a switch or loop with a break
/// inside of it, this is fine.
bool CodeGenFunction::containsBreak(const Stmt *S) {
- // Null statement, not a label!
- if (!S) return false;
+ llvm::SmallVector<const Stmt *, 32> WorkList;
+ WorkList.push_back(S);
+ while (!WorkList.empty()) {
+ const Stmt *CurStmt = WorkList.pop_back_val();
- // If this is a switch or loop that defines its own break scope, then we can
- // include it and anything inside of it.
- if (isa<SwitchStmt>(S) || isa<WhileStmt>(S) || isa<DoStmt>(S) ||
- isa<ForStmt>(S))
- return false;
+ // Null statement, not a label!
+ if (!CurStmt)
+ continue;
- if (isa<BreakStmt>(S))
- return true;
+ // If this is a switch or loop that defines its own break scope, then we can
+ // include it and anything inside of it.
+ if (isa<SwitchStmt>(CurStmt) || isa<WhileStmt>(CurStmt) ||
+ isa<DoStmt>(CurStmt) || isa<ForStmt>(CurStmt))
+ continue;
- // Scan subexpressions for verboten breaks.
- for (const Stmt *SubStmt : S->children())
- if (containsBreak(SubStmt))
+ if (isa<BreakStmt>(CurStmt))
return true;
+ // Scan subexpressions for verboten breaks.
+ for (const Stmt *SubStmt : CurStmt->children())
+ WorkList.push_back(SubStmt);
+ }
return false;
}
bool CodeGenFunction::mightAddDeclToScope(const Stmt *S) {
- if (!S) return false;
-
- // Some statement kinds add a scope and thus never add a decl to the current
- // scope. Note, this list is longer than the list of statements that might
- // have an unscoped decl nested within them, but this way is conservatively
- // correct even if more statement kinds are added.
- if (isa<IfStmt>(S) || isa<SwitchStmt>(S) || isa<WhileStmt>(S) ||
- isa<DoStmt>(S) || isa<ForStmt>(S) || isa<CompoundStmt>(S) ||
- isa<CXXForRangeStmt>(S) || isa<CXXTryStmt>(S) ||
- isa<ObjCForCollectionStmt>(S) || isa<ObjCAtTryStmt>(S))
- return false;
+ llvm::SmallVector<const Stmt *, 32> WorkList;
+ WorkList.push_back(S);
+ while (!WorkList.empty()) {
+ const Stmt *CurStmt = WorkList.pop_back_val();
- if (isa<DeclStmt>(S))
- return true;
+ if (!CurStmt)
+ continue;
- for (const Stmt *SubStmt : S->children())
- if (mightAddDeclToScope(SubStmt))
+ // Some statement kinds add a scope and thus never add a decl to the current
+ // scope. Note, this list is longer than the list of statements that might
+ // have an unscoped decl nested within them, but this way is conservatively
+ // correct even if more statement kinds are added.
+ if (isa<IfStmt>(CurStmt) || isa<SwitchStmt>(CurStmt) ||
+ isa<WhileStmt>(CurStmt) || isa<DoStmt>(CurStmt) ||
+ isa<ForStmt>(CurStmt) || isa<CompoundStmt>(CurStmt) ||
+ isa<CXXForRangeStmt>(CurStmt) || isa<CXXTryStmt>(CurStmt) ||
+ isa<ObjCForCollectionStmt>(CurStmt) || isa<ObjCAtTryStmt>(CurStmt))
+ continue;
+
+ if (isa<DeclStmt>(CurStmt))
return true;
+ for (const Stmt *SubStmt : CurStmt->children())
+ WorkList.push_back(SubStmt);
+ }
return false;
}
diff --git a/clang/test/CodeGen/switch-case-overflow.c b/clang/test/CodeGen/switch-case-overflow.c
new file mode 100644
index 00000000000000..f548d645e322b9
--- /dev/null
+++ b/clang/test/CodeGen/switch-case-overflow.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
+
+#define CASES(n) case (n): case (n + 1): case (n + 2): case (n + 3):
+#define CASES16(n) CASES(n) CASES(n + 4) CASES(n + 8) CASES(n + 12)
+#define CASES64(n) CASES16(n) CASES16(n + 16) CASES16(n + 32) CASES16(n + 48)
+#define CASES256(n) CASES64(n) CASES64(n + 64) CASES64(n + 128) CASES64(n + 192)
+#define CASES1024(n) CASES256(n) CASES256(n + 256) CASES256(n + 512) CASES256(n + 768)
+#define CASES4192(n) CASES1024(n) CASES1024(n + 1024) CASES1024(n + 2048) CASES1024(n + 3072)
+#define CASES16768(n) CASES4192(n) CASES4192(n + 4192) CASES4192(n + 8384) CASES4192(n + 12576)
+#define CASES_STARTING_AT(n) CASES16768(n) CASES16768(n + 16768) CASES16768(n + 33536) CASES16768(n + 50304)
+
+// Check this doesn't cause the compiler to crash
+void foo() {
+ // CHECK-LABEL: @foo
+ // CHECK-NOT: switch{{ }}
+ // CHECK-NOT: br{{ }}
+
+ // 37 does not match a switch case
+ switch (37) {
+ CASES_STARTING_AT(100)
+ break;
+ }
+
+ // 2000 matches a switch case
+ switch(2000) {
+ CASES_STARTING_AT(0)
+ break;
+ }
+}
More information about the cfe-commits
mailing list