[clang] [clang] Diagnose problematic diagnostic messages (PR #93229)
Erich Keane via cfe-commits
cfe-commits at lists.llvm.org
Thu May 23 12:32:57 PDT 2024
================
@@ -1213,6 +1213,194 @@ static bool isRemark(const Record &Diag) {
return ClsName == "CLASS_REMARK";
}
+// Presumes the text has been split at the first whitespace or hyphen.
+static bool isExemptAtStart(StringRef Text) {
+ // Fast path, the first character is lowercase or not alphanumeric.
+ if (isLower(Text[0]) || !isAlnum(Text[0]))
+ return true;
+
+ // If the text is all uppercase (or numbers, +, or _), then we assume it's an
+ // acronym and that's allowed. This covers cases like ISO, C23, C++14, and
+ // OBJECT_MODE. However, if there's only a single letter other than "C", we
+ // do not exempt it so that we catch a case like "A really bad idea" while
+ // still allowing a case like "C does not allow...".
+ if (llvm::all_of(Text, [](char C) {
+ return isUpper(C) || isDigit(C) || C == '+' || C == '_';
+ }))
+ return Text.size() > 1 || Text[0] == 'C';
+
+ // Otherwise, there are a few other exemptions.
+ return StringSwitch<bool>(Text)
+ .Case("AddressSanitizer", true)
+ .Case("CFString", true)
+ .Case("Clang", true)
+ .Case("Fuchsia", true)
+ .Case("GNUstep", true)
+ .Case("IBOutletCollection", true)
+ .Case("Microsoft", true)
+ .Case("Neon", true)
+ .StartsWith("NSInvocation", true) // NSInvocation, NSInvocation's
+ .Case("Objective", true) // Objective-C, Objective-C++
+ .Case("OpenACC", true)
+ .Case("OpenCL", true)
+ .Case("OpenMP", true)
+ .Case("Pascal", true)
+ .Case("Swift", true)
+ .Case("Unicode", true)
+ .Case("Vulkan", true)
+ .Case("WebAssembly", true)
+ .Default(false);
+}
+
+// Does not presume the text has been split at all.
+static bool isExemptAtEnd(StringRef Text) {
+ // Rather than come up with a list of characters that are allowed, we go the
+ // other way and look only for characters that are not allowed.
+ switch (Text.back()) {
+ default:
+ return true;
+ case '?':
+ // Explicitly allowed to support "; did you mean?".
+ return true;
+ case '.':
+ case '!':
+ return false;
+ }
+}
+
+static void verifyDiagnosticWording(const Record &Diag) {
+ StringRef FullDiagText = Diag.getValueAsString("Summary");
+
+ auto DiagnoseStart = [&](StringRef Text) {
+ // Verify that the text does not start with a capital letter, except for
+ // special cases that are exempt like ISO and C++. Find the first word
+ // by looking for a word breaking character.
+ char Separators[] = {' ', '-', ',', '}'};
+ auto Iter = std::find_first_of(
+ Text.begin(), Text.end(), std::begin(Separators), std::end(Separators));
+
+ StringRef First = Text.substr(0, Iter - Text.begin());
+ if (!isExemptAtStart(First)) {
+ PrintError(&Diag,
+ "Diagnostics should not start with a capital letter; '" +
+ First + "' is invalid");
+ }
+ };
+
+ auto DiagnoseEnd = [&](StringRef Text) {
+ // Verify that the text does not end with punctuation like '.' or '!'.
+ if (!isExemptAtEnd(Text)) {
+ PrintError(&Diag, "Diagnostics should not end with punctuation; '" +
+ Text.substr(Text.size() - 1, 1) + "' is invalid");
+ }
+ };
+
+ // If the diagnostic starts with %select, look through it to see whether any
+ // of the options will cause a problem.
+ if (FullDiagText.starts_with("%select{")) {
+ // Do a balanced delimiter scan from the start of the text to find the
+ // closing '}', skipping intermediary {} pairs.
+
+ size_t BraceCount = 1;
+ auto Iter = FullDiagText.begin() + /*%select{*/ 8;
+ for (auto End = FullDiagText.end(); Iter != End; ++Iter) {
+ char Ch = *Iter;
+ if (Ch == '{')
+ ++BraceCount;
+ else if (Ch == '}')
+ --BraceCount;
+ if (!BraceCount)
+ break;
+ }
+ // Defending against a malformed diagnostic string.
+ if (BraceCount != 0)
+ return;
+
+ StringRef SelectText = FullDiagText.substr(
+ /*%select{*/ 8, Iter - FullDiagText.begin() - /*%select{*/ 8);
----------------
erichkeane wrote:
instead of magic numbers, can we use 'sizeof' for %select{?
https://github.com/llvm/llvm-project/pull/93229
More information about the cfe-commits
mailing list