[clang] [lldb] [llvm] [mlir] [Support] Validate number of arguments passed to formatv() (PR #105745)
Mehdi Amini via cfe-commits
cfe-commits at lists.llvm.org
Tue Aug 27 13:06:21 PDT 2024
================
@@ -67,92 +68,123 @@ formatv_object_base::parseReplacementItem(StringRef Spec) {
StringRef Options;
size_t Index = 0;
RepString = RepString.trim();
- if (RepString.consumeInteger(0, Index)) {
- assert(false && "Invalid replacement sequence index!");
- return ReplacementItem{};
- }
+ if (RepString.consumeInteger(0, Index))
+ return "Invalid replacement sequence index!";
RepString = RepString.trim();
if (RepString.consume_front(",")) {
if (!consumeFieldLayout(RepString, Where, Align, Pad))
- assert(false && "Invalid replacement field layout specification!");
+ return "Invalid replacement field layout specification!";
}
RepString = RepString.trim();
if (RepString.consume_front(":")) {
Options = RepString.trim();
RepString = StringRef();
}
RepString = RepString.trim();
- if (!RepString.empty()) {
- assert(false && "Unexpected characters found in replacement string!");
- }
-
+ if (!RepString.empty())
+ return "Unexpected character found in replacement string!";
return ReplacementItem{Spec, Index, Align, Where, Pad, Options};
}
-std::pair<ReplacementItem, StringRef>
-formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
- while (!Fmt.empty()) {
- // Everything up until the first brace is a literal.
- if (Fmt.front() != '{') {
- std::size_t BO = Fmt.find_first_of('{');
- return std::make_pair(ReplacementItem{Fmt.substr(0, BO)}, Fmt.substr(BO));
- }
-
- StringRef Braces = Fmt.take_while([](char C) { return C == '{'; });
- // If there is more than one brace, then some of them are escaped. Treat
- // these as replacements.
- if (Braces.size() > 1) {
- size_t NumEscapedBraces = Braces.size() / 2;
- StringRef Middle = Fmt.take_front(NumEscapedBraces);
- StringRef Right = Fmt.drop_front(NumEscapedBraces * 2);
- return std::make_pair(ReplacementItem{Middle}, Right);
- }
- // An unterminated open brace is undefined. Assert to indicate that this is
- // undefined and that we consider it an error. When asserts are disabled,
- // build a replacement item with an error message.
- std::size_t BC = Fmt.find_first_of('}');
- if (BC == StringRef::npos) {
- assert(
- false &&
- "Unterminated brace sequence. Escape with {{ for a literal brace.");
- return std::make_pair(
- ReplacementItem{"Unterminated brace sequence. Escape with {{ for a "
- "literal brace."},
- StringRef());
- }
-
- // Even if there is a closing brace, if there is another open brace before
- // this closing brace, treat this portion as literal, and try again with the
- // next one.
- std::size_t BO2 = Fmt.find_first_of('{', 1);
- if (BO2 < BC)
- return std::make_pair(ReplacementItem{Fmt.substr(0, BO2)},
- Fmt.substr(BO2));
-
- StringRef Spec = Fmt.slice(1, BC);
- StringRef Right = Fmt.substr(BC + 1);
-
- auto RI = parseReplacementItem(Spec);
- if (RI)
- return std::make_pair(*RI, Right);
+static std::variant<std::pair<ReplacementItem, StringRef>, StringRef>
+splitLiteralAndReplacement(StringRef Fmt) {
+ // Everything up until the first brace is a literal.
+ if (Fmt.front() != '{') {
+ std::size_t BO = Fmt.find_first_of('{');
+ return std::make_pair(ReplacementItem(Fmt.substr(0, BO)), Fmt.substr(BO));
+ }
- // If there was an error parsing the replacement item, treat it as an
- // invalid replacement spec, and just continue.
- Fmt = Fmt.drop_front(BC + 1);
+ StringRef Braces = Fmt.take_while([](char C) { return C == '{'; });
+ // If there is more than one brace, then some of them are escaped. Treat
+ // these as replacements.
+ if (Braces.size() > 1) {
+ size_t NumEscapedBraces = Braces.size() / 2;
+ StringRef Middle = Fmt.take_front(NumEscapedBraces);
+ StringRef Right = Fmt.drop_front(NumEscapedBraces * 2);
+ return std::make_pair(ReplacementItem(Middle), Right);
}
- return std::make_pair(ReplacementItem{Fmt}, StringRef());
+ // An unterminated open brace is undefined. Assert to indicate that this is
+ // undefined and that we consider it an error. When asserts are disabled,
+ // build a replacement item with an error message.
+ std::size_t BC = Fmt.find_first_of('}');
+ if (BC == StringRef::npos)
+ return "Unterminated brace sequence. Escape with {{ for a literal brace.";
+
+ // Even if there is a closing brace, if there is another open brace before
+ // this closing brace, treat this portion as literal, and try again with the
+ // next one.
+ std::size_t BO2 = Fmt.find_first_of('{', 1);
+ if (BO2 < BC)
+ return std::make_pair(ReplacementItem{Fmt.substr(0, BO2)}, Fmt.substr(BO2));
+
+ StringRef Spec = Fmt.slice(1, BC);
+ StringRef Right = Fmt.substr(BC + 1);
+
+ auto RI = parseReplacementItem(Spec);
+ if (const StringRef *ErrMsg = std::get_if<1>(&RI))
+ return *ErrMsg;
+
+ return std::make_pair(std::get<0>(RI), Right);
}
-SmallVector<ReplacementItem, 2>
-formatv_object_base::parseFormatString(StringRef Fmt) {
+std::pair<SmallVector<ReplacementItem, 2>, bool>
+formatv_object_base::parseFormatString(raw_ostream &S, const StringRef Fmt,
+ size_t NumArgs, bool Validate) {
SmallVector<ReplacementItem, 2> Replacements;
ReplacementItem I;
- while (!Fmt.empty()) {
- std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt);
+ size_t NumExpectedArgs = 0;
+
+ // Make a copy for pasring as it updates it.
+ StringRef ParseFmt = Fmt;
+ while (!ParseFmt.empty()) {
+ auto RI = splitLiteralAndReplacement(ParseFmt);
+ if (const StringRef *ErrMsg = std::get_if<1>(&RI)) {
+ // If there was an error parsing the format string, write the error to the
+ // stream, and return false as second member of the pair.
+ errs() << "Invalid format string: " << Fmt << "\n";
+ assert(0 && "Invalid format string");
+ S << *ErrMsg;
+ return {{}, false};
+ }
+ std::tie(I, ParseFmt) = std::get<0>(RI);
if (I.Type != ReplacementType::Empty)
Replacements.push_back(I);
+ if (I.Type == ReplacementType::Format)
+ NumExpectedArgs = std::max(NumExpectedArgs, I.Index + 1);
}
- return Replacements;
+ if (!Validate)
+ return {Replacements, true};
+
+ // Perform additional validation. Verify that the number of arguments matches
+ // the number of replacement indices and that there are no holes in the
+ // replacement indexes.
+ if (NumExpectedArgs != NumArgs) {
+ errs() << "Expected " << NumExpectedArgs << " Args, but got " << NumArgs
+ << " for format string '" << Fmt << "'\n";
+ assert(0 && "Invalid formatv() call");
+ S << "Expected " << NumExpectedArgs << " Args, but got " << NumArgs
+ << " for format string '" << Fmt << "'\n";
+ return {{}, false};
+ }
+
+ SmallSet<size_t, 2> Indices;
----------------
joker-eph wrote:
`SmallSetVector` is worse than just a `SmallSet`, so no :)
What I had in mind would be:
```
SmallVector<bool> Indices;
Indices.resize(Replacements.size());
int count = 0;
for (const ReplacementItem &R : Replacements) {
if (R.Type != ReplacementType::Format)
continue;
if (!Indices[R.Index]) {
Indices[R.Index] = true;
count++;
}
}
}
```
And you have the result in the `count` variable.
https://github.com/llvm/llvm-project/pull/105745
More information about the cfe-commits
mailing list