[Mlir-commits] [clang] [lldb] [llvm] [mlir] [Support] Validate number of arguments passed to formatv() (PR #105745)

Mehdi Amini llvmlistbot at llvm.org
Tue Aug 27 13:06:21 PDT 2024


================
@@ -67,92 +68,123 @@ formatv_object_base::parseReplacementItem(StringRef Spec) {
   StringRef Options;
   size_t Index = 0;
   RepString = RepString.trim();
-  if (RepString.consumeInteger(0, Index)) {
-    assert(false && "Invalid replacement sequence index!");
-    return ReplacementItem{};
-  }
+  if (RepString.consumeInteger(0, Index))
+    return "Invalid replacement sequence index!";
   RepString = RepString.trim();
   if (RepString.consume_front(",")) {
     if (!consumeFieldLayout(RepString, Where, Align, Pad))
-      assert(false && "Invalid replacement field layout specification!");
+      return "Invalid replacement field layout specification!";
   }
   RepString = RepString.trim();
   if (RepString.consume_front(":")) {
     Options = RepString.trim();
     RepString = StringRef();
   }
   RepString = RepString.trim();
-  if (!RepString.empty()) {
-    assert(false && "Unexpected characters found in replacement string!");
-  }
-
+  if (!RepString.empty())
+    return "Unexpected character found in replacement string!";
   return ReplacementItem{Spec, Index, Align, Where, Pad, Options};
 }
 
-std::pair<ReplacementItem, StringRef>
-formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
-  while (!Fmt.empty()) {
-    // Everything up until the first brace is a literal.
-    if (Fmt.front() != '{') {
-      std::size_t BO = Fmt.find_first_of('{');
-      return std::make_pair(ReplacementItem{Fmt.substr(0, BO)}, Fmt.substr(BO));
-    }
-
-    StringRef Braces = Fmt.take_while([](char C) { return C == '{'; });
-    // If there is more than one brace, then some of them are escaped.  Treat
-    // these as replacements.
-    if (Braces.size() > 1) {
-      size_t NumEscapedBraces = Braces.size() / 2;
-      StringRef Middle = Fmt.take_front(NumEscapedBraces);
-      StringRef Right = Fmt.drop_front(NumEscapedBraces * 2);
-      return std::make_pair(ReplacementItem{Middle}, Right);
-    }
-    // An unterminated open brace is undefined. Assert to indicate that this is
-    // undefined and that we consider it an error. When asserts are disabled,
-    // build a replacement item with an error message.
-    std::size_t BC = Fmt.find_first_of('}');
-    if (BC == StringRef::npos) {
-      assert(
-          false &&
-          "Unterminated brace sequence. Escape with {{ for a literal brace.");
-      return std::make_pair(
-          ReplacementItem{"Unterminated brace sequence. Escape with {{ for a "
-                          "literal brace."},
-          StringRef());
-    }
-
-    // Even if there is a closing brace, if there is another open brace before
-    // this closing brace, treat this portion as literal, and try again with the
-    // next one.
-    std::size_t BO2 = Fmt.find_first_of('{', 1);
-    if (BO2 < BC)
-      return std::make_pair(ReplacementItem{Fmt.substr(0, BO2)},
-                            Fmt.substr(BO2));
-
-    StringRef Spec = Fmt.slice(1, BC);
-    StringRef Right = Fmt.substr(BC + 1);
-
-    auto RI = parseReplacementItem(Spec);
-    if (RI)
-      return std::make_pair(*RI, Right);
+static std::variant<std::pair<ReplacementItem, StringRef>, StringRef>
+splitLiteralAndReplacement(StringRef Fmt) {
+  // Everything up until the first brace is a literal.
+  if (Fmt.front() != '{') {
+    std::size_t BO = Fmt.find_first_of('{');
+    return std::make_pair(ReplacementItem(Fmt.substr(0, BO)), Fmt.substr(BO));
+  }
 
-    // If there was an error parsing the replacement item, treat it as an
-    // invalid replacement spec, and just continue.
-    Fmt = Fmt.drop_front(BC + 1);
+  StringRef Braces = Fmt.take_while([](char C) { return C == '{'; });
+  // If there is more than one brace, then some of them are escaped.  Treat
+  // these as replacements.
+  if (Braces.size() > 1) {
+    size_t NumEscapedBraces = Braces.size() / 2;
+    StringRef Middle = Fmt.take_front(NumEscapedBraces);
+    StringRef Right = Fmt.drop_front(NumEscapedBraces * 2);
+    return std::make_pair(ReplacementItem(Middle), Right);
   }
-  return std::make_pair(ReplacementItem{Fmt}, StringRef());
+  // An unterminated open brace is undefined. Assert to indicate that this is
+  // undefined and that we consider it an error. When asserts are disabled,
+  // build a replacement item with an error message.
+  std::size_t BC = Fmt.find_first_of('}');
+  if (BC == StringRef::npos)
+    return "Unterminated brace sequence. Escape with {{ for a literal brace.";
+
+  // Even if there is a closing brace, if there is another open brace before
+  // this closing brace, treat this portion as literal, and try again with the
+  // next one.
+  std::size_t BO2 = Fmt.find_first_of('{', 1);
+  if (BO2 < BC)
+    return std::make_pair(ReplacementItem{Fmt.substr(0, BO2)}, Fmt.substr(BO2));
+
+  StringRef Spec = Fmt.slice(1, BC);
+  StringRef Right = Fmt.substr(BC + 1);
+
+  auto RI = parseReplacementItem(Spec);
+  if (const StringRef *ErrMsg = std::get_if<1>(&RI))
+    return *ErrMsg;
+
+  return std::make_pair(std::get<0>(RI), Right);
 }
 
-SmallVector<ReplacementItem, 2>
-formatv_object_base::parseFormatString(StringRef Fmt) {
+std::pair<SmallVector<ReplacementItem, 2>, bool>
+formatv_object_base::parseFormatString(raw_ostream &S, const StringRef Fmt,
+                                       size_t NumArgs, bool Validate) {
   SmallVector<ReplacementItem, 2> Replacements;
   ReplacementItem I;
-  while (!Fmt.empty()) {
-    std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt);
+  size_t NumExpectedArgs = 0;
+
+  // Make a copy for pasring as it updates it.
+  StringRef ParseFmt = Fmt;
+  while (!ParseFmt.empty()) {
+    auto RI = splitLiteralAndReplacement(ParseFmt);
+    if (const StringRef *ErrMsg = std::get_if<1>(&RI)) {
+      // If there was an error parsing the format string, write the error to the
+      // stream, and return false as second member of the pair.
+      errs() << "Invalid format string: " << Fmt << "\n";
+      assert(0 && "Invalid format string");
+      S << *ErrMsg;
+      return {{}, false};
+    }
+    std::tie(I, ParseFmt) = std::get<0>(RI);
     if (I.Type != ReplacementType::Empty)
       Replacements.push_back(I);
+    if (I.Type == ReplacementType::Format)
+      NumExpectedArgs = std::max(NumExpectedArgs, I.Index + 1);
   }
-  return Replacements;
+  if (!Validate)
+    return {Replacements, true};
+
+  // Perform additional validation. Verify that the number of arguments matches
+  // the number of replacement indices and that there are no holes in the
+  // replacement indexes.
+  if (NumExpectedArgs != NumArgs) {
+    errs() << "Expected " << NumExpectedArgs << " Args, but got " << NumArgs
+           << " for format string '" << Fmt << "'\n";
+    assert(0 && "Invalid formatv() call");
+    S << "Expected " << NumExpectedArgs << " Args, but got " << NumArgs
+      << " for format string '" << Fmt << "'\n";
+    return {{}, false};
+  }
+
+  SmallSet<size_t, 2> Indices;
----------------
joker-eph wrote:

`SmallSetVector` is worse than just a `SmallSet`, so no :)

What I had in mind would be:

```
  SmallVector<bool> Indices;
  Indices.resize(Replacements.size());
  int count = 0;
  for (const ReplacementItem &R : Replacements) {
     if (R.Type != ReplacementType::Format)
       continue;
     if (!Indices[R.Index]) {
       Indices[R.Index] = true;
       count++;
     }
   }
}
```

And you have the result in the `count` variable.

https://github.com/llvm/llvm-project/pull/105745


More information about the Mlir-commits mailing list