[llvm] [FileCheck] Improve printing variables with escapes (PR #145865)

Mészáros Gergely via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 26 22:31:57 PDT 2025


https://github.com/Maetveis updated https://github.com/llvm/llvm-project/pull/145865

>From cf8748ac3b5b05776e52332b99eda682ca7e8f68 Mon Sep 17 00:00:00 2001
From: Gergely Meszaros <gergely.meszaros at intel.com>
Date: Thu, 26 Jun 2025 02:54:21 -0700
Subject: [PATCH 1/6] [FileCheck] Improve printing variables with escapes

Firstly fix FileCheck printing string variables
double-escaped (first regex, then C-style).

This is confusing because it is not clear if the printed
value is the literal value or exactly how it is escaped, without
looking at FileCheck's source code.

Secondly, only escape when doing so makes it easier to read the value
(when the string contains tabs, newlines or non-printable characters).
When the variable value is escaped, make a note of it in the output too,
in order to avoid confusion.

The common case that is motivating this change is variables that contain
windows style paths with backslashes. These were printed as
`"C:\\\\Program Files\\\\MyApp\\\\file.txt"`.
Now prefer to print them as `"C:\Program Files\MyApp\file.txt"`.
Printing the value literally also makes it easier to search for
variables in the output, since the user can just copy-paste it.
---
 llvm/lib/FileCheck/FileCheck.cpp   | 53 ++++++++++++++++++++++++++----
 llvm/lib/FileCheck/FileCheckImpl.h | 21 +++++++++---
 llvm/test/FileCheck/var-escape.txt |  6 ++--
 3 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index bcca499322aee..5ddb7420820dc 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -264,7 +264,7 @@ BinaryOperation::getImplicitFormat(const SourceMgr &SM) const {
                                                          : *RightFormat;
 }
 
-Expected<std::string> NumericSubstitution::getResult() const {
+Expected<std::string> NumericSubstitution::getResultRegex() const {
   assert(ExpressionPointer->getAST() != nullptr &&
          "Substituting empty expression");
   Expected<APInt> EvaluatedValue = ExpressionPointer->getAST()->eval();
@@ -274,7 +274,17 @@ Expected<std::string> NumericSubstitution::getResult() const {
   return Format.getMatchingString(*EvaluatedValue);
 }
 
-Expected<std::string> StringSubstitution::getResult() const {
+Expected<std::string> NumericSubstitution::getResultForDiagnostics() const {
+  // The "regex" returned by getResultRegex() is just a numeric value
+  // like '42', '0x2A', '-17', 'DEADBEEF' etc. This is already suitable for use in diagnostics.
+  Expected<std::string> Literal = getResultRegex();
+  if (!Literal)
+    return Literal;
+
+  return "\"" + std::move(*Literal) + "\"";
+}
+
+Expected<std::string> StringSubstitution::getResultRegex() const {
   // Look up the value and escape it so that we can put it into the regex.
   Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
   if (!VarVal)
@@ -282,6 +292,37 @@ Expected<std::string> StringSubstitution::getResult() const {
   return Regex::escape(*VarVal);
 }
 
+Expected<std::string> StringSubstitution::getResultForDiagnostics() const {
+    Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
+    if (!VarVal)
+      return VarVal.takeError();
+
+    std::string Result;
+    Result.reserve(VarVal->size() + 2);
+    raw_string_ostream OS(Result);
+
+    OS << '"';
+    // Escape the string if it contains any characters that
+    // make it hard to read, such as tabs, newlines, quotes, and non-printable characters.
+    // Note that we do not include backslashes in this set, because they are
+    // common in Windows paths and escaping them would make the output
+    // harder to read.
+    // However, when we do escape, backslashes are escaped as well,
+    // otherwise the output would be ambiguous.
+    const bool NeedsEscaping = llvm::any_of(*VarVal, [](char C) {
+      return C == '\t' || C == '\n' || C == '"' || !isPrint(C);
+    });
+    if (NeedsEscaping)
+      OS.write_escaped(*VarVal);
+    else
+      OS << *VarVal;
+    OS << '"';
+    if (NeedsEscaping)
+      OS << " (escaped value)";
+    
+    return Result;
+}
+
 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); }
 
 Expected<Pattern::VariableProperties>
@@ -1106,7 +1147,7 @@ Pattern::MatchResult Pattern::match(StringRef Buffer,
     Error Errs = Error::success();
     for (const auto &Substitution : Substitutions) {
       // Substitute and check for failure (e.g. use of undefined variable).
-      Expected<std::string> Value = Substitution->getResult();
+      Expected<std::string> Value = Substitution->getResultRegex();
       if (!Value) {
         // Convert to an ErrorDiagnostic to get location information. This is
         // done here rather than printMatch/printNoMatch since now we know which
@@ -1210,7 +1251,7 @@ void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
       SmallString<256> Msg;
       raw_svector_ostream OS(Msg);
 
-      Expected<std::string> MatchedValue = Substitution->getResult();
+      Expected<std::string> MatchedValue = Substitution->getResultForDiagnostics();
       // Substitution failures are handled in printNoMatch().
       if (!MatchedValue) {
         consumeError(MatchedValue.takeError());
@@ -1218,8 +1259,8 @@ void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
       }
 
       OS << "with \"";
-      OS.write_escaped(Substitution->getFromString()) << "\" equal to \"";
-      OS.write_escaped(*MatchedValue) << "\"";
+      OS.write_escaped(Substitution->getFromString()) << "\" equal to ";
+      OS << *MatchedValue;
 
       // We report only the start of the match/search range to suggest we are
       // reporting the substitutions as set at the start of the match/search.
diff --git a/llvm/lib/FileCheck/FileCheckImpl.h b/llvm/lib/FileCheck/FileCheckImpl.h
index b3cd2af5d57e3..176668a3e5154 100644
--- a/llvm/lib/FileCheck/FileCheckImpl.h
+++ b/llvm/lib/FileCheck/FileCheckImpl.h
@@ -366,9 +366,14 @@ class Substitution {
   /// \returns the index where the substitution is to be performed in RegExStr.
   size_t getIndex() const { return InsertIdx; }
 
-  /// \returns a string containing the result of the substitution represented
+  /// \returns a regular expression string that matches the result of the substitution represented
   /// by this class instance or an error if substitution failed.
-  virtual Expected<std::string> getResult() const = 0;
+  virtual Expected<std::string> getResultRegex() const = 0;
+
+  /// \returns a string containing the result of the substitution represented
+  /// by this class instance in a form suitable for diagnostics, or an error if
+  /// substitution failed.
+  virtual Expected<std::string> getResultForDiagnostics() const = 0;
 };
 
 class StringSubstitution : public Substitution {
@@ -379,7 +384,11 @@ class StringSubstitution : public Substitution {
 
   /// \returns the text that the string variable in this substitution matched
   /// when defined, or an error if the variable is undefined.
-  Expected<std::string> getResult() const override;
+  Expected<std::string> getResultRegex() const override;
+
+  /// \returns the text that the string variable in this substitution matched
+  /// when defined, in a form suitable for diagnostics, or an error if the variable is undefined.
+  Expected<std::string> getResultForDiagnostics() const override;
 };
 
 class NumericSubstitution : public Substitution {
@@ -397,7 +406,11 @@ class NumericSubstitution : public Substitution {
 
   /// \returns a string containing the result of evaluating the expression in
   /// this substitution, or an error if evaluation failed.
-  Expected<std::string> getResult() const override;
+  Expected<std::string> getResultRegex() const override;
+
+  /// \returns a string containing the result of evaluating the expression in
+  /// this substitution, in a form suitable for diagnostics, or an error if evaluation failed.
+  Expected<std::string> getResultForDiagnostics() const override;
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/FileCheck/var-escape.txt b/llvm/test/FileCheck/var-escape.txt
index c3dc0fc7591d7..b290b6671be3d 100644
--- a/llvm/test/FileCheck/var-escape.txt
+++ b/llvm/test/FileCheck/var-escape.txt
@@ -14,9 +14,9 @@ VARS-NEXT: [[WINPATH]] [[NOT_ESCAPED]] [[ESCAPED]] [[#$NUMERIC + 0]]
 ; RUN:   -dump-input=never --strict-whitespace --check-prefix=VARS --input-file=%t.1 %s 2>&1 \
 ; RUN: | FileCheck %s
 
-CHECK: with "WINPATH" equal to "A:\\\\windows\\\\style\\\\path"
-CHECK: with "NOT_ESCAPED" equal to "shouldn't be escaped \\[a-Z\\]\\\\\\+\\$"
-CHECK: with "ESCAPED" equal to "\\\\ \014\013 needs\to \"be\" escaped\\\000"
+CHECK: with "WINPATH" equal to "A:\windows\style\path"
+CHECK: with "NOT_ESCAPED" equal to "shouldn't be escaped [a-Z]\+$"
+CHECK: with "ESCAPED" equal to "\\ \014\013 needs\to \"be\" escaped\000" (escaped value)
 CHECK: with "$NUMERIC + 0" equal to "DEADBEEF"
 
 ; Test escaping of the name of a numeric substitution, which might contain

>From 845ed6897885a4cb3f758fb1a52a9a2b82a0c434 Mon Sep 17 00:00:00 2001
From: Gergely Meszaros <gergely.meszaros at intel.com>
Date: Thu, 26 Jun 2025 03:24:20 -0700
Subject: [PATCH 2/6] Improve comment wording

---
 llvm/lib/FileCheck/FileCheck.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index 5ddb7420820dc..57ed87d19f5f3 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -304,7 +304,8 @@ Expected<std::string> StringSubstitution::getResultForDiagnostics() const {
     OS << '"';
     // Escape the string if it contains any characters that
     // make it hard to read, such as tabs, newlines, quotes, and non-printable characters.
-    // Note that we do not include backslashes in this set, because they are
+    // These are the characters that are escaped by write_escaped(),
+    // except we do not include backslashes, because they are
     // common in Windows paths and escaping them would make the output
     // harder to read.
     // However, when we do escape, backslashes are escaped as well,

>From b3ec140316fe450e30ca33b573bf8c48a36bbe46 Mon Sep 17 00:00:00 2001
From: Gergely Meszaros <gergely.meszaros at intel.com>
Date: Thu, 26 Jun 2025 03:27:32 -0700
Subject: [PATCH 3/6] Apply clang-format

---
 llvm/lib/FileCheck/FileCheck.cpp   | 64 +++++++++++++++---------------
 llvm/lib/FileCheck/FileCheckImpl.h |  3 +-
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index 57ed87d19f5f3..4a141355de969 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -276,7 +276,8 @@ Expected<std::string> NumericSubstitution::getResultRegex() const {
 
 Expected<std::string> NumericSubstitution::getResultForDiagnostics() const {
   // The "regex" returned by getResultRegex() is just a numeric value
-  // like '42', '0x2A', '-17', 'DEADBEEF' etc. This is already suitable for use in diagnostics.
+  // like '42', '0x2A', '-17', 'DEADBEEF' etc. This is already suitable for use
+  // in diagnostics.
   Expected<std::string> Literal = getResultRegex();
   if (!Literal)
     return Literal;
@@ -293,35 +294,35 @@ Expected<std::string> StringSubstitution::getResultRegex() const {
 }
 
 Expected<std::string> StringSubstitution::getResultForDiagnostics() const {
-    Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
-    if (!VarVal)
-      return VarVal.takeError();
-
-    std::string Result;
-    Result.reserve(VarVal->size() + 2);
-    raw_string_ostream OS(Result);
-
-    OS << '"';
-    // Escape the string if it contains any characters that
-    // make it hard to read, such as tabs, newlines, quotes, and non-printable characters.
-    // These are the characters that are escaped by write_escaped(),
-    // except we do not include backslashes, because they are
-    // common in Windows paths and escaping them would make the output
-    // harder to read.
-    // However, when we do escape, backslashes are escaped as well,
-    // otherwise the output would be ambiguous.
-    const bool NeedsEscaping = llvm::any_of(*VarVal, [](char C) {
-      return C == '\t' || C == '\n' || C == '"' || !isPrint(C);
-    });
-    if (NeedsEscaping)
-      OS.write_escaped(*VarVal);
-    else
-      OS << *VarVal;
-    OS << '"';
-    if (NeedsEscaping)
-      OS << " (escaped value)";
-    
-    return Result;
+  Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
+  if (!VarVal)
+    return VarVal.takeError();
+
+  std::string Result;
+  Result.reserve(VarVal->size() + 2);
+  raw_string_ostream OS(Result);
+
+  OS << '"';
+  // Escape the string if it contains any characters that
+  // make it hard to read, such as tabs, newlines, quotes, and non-printable
+  // characters. These are the characters that are escaped by write_escaped(),
+  // except we do not include backslashes, because they are
+  // common in Windows paths and escaping them would make the output
+  // harder to read.
+  // However, when we do escape, backslashes are escaped as well,
+  // otherwise the output would be ambiguous.
+  const bool NeedsEscaping = llvm::any_of(*VarVal, [](char C) {
+    return C == '\t' || C == '\n' || C == '"' || !isPrint(C);
+  });
+  if (NeedsEscaping)
+    OS.write_escaped(*VarVal);
+  else
+    OS << *VarVal;
+  OS << '"';
+  if (NeedsEscaping)
+    OS << " (escaped value)";
+
+  return Result;
 }
 
 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); }
@@ -1252,7 +1253,8 @@ void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
       SmallString<256> Msg;
       raw_svector_ostream OS(Msg);
 
-      Expected<std::string> MatchedValue = Substitution->getResultForDiagnostics();
+      Expected<std::string> MatchedValue =
+          Substitution->getResultForDiagnostics();
       // Substitution failures are handled in printNoMatch().
       if (!MatchedValue) {
         consumeError(MatchedValue.takeError());
diff --git a/llvm/lib/FileCheck/FileCheckImpl.h b/llvm/lib/FileCheck/FileCheckImpl.h
index 176668a3e5154..556b1f7d68522 100644
--- a/llvm/lib/FileCheck/FileCheckImpl.h
+++ b/llvm/lib/FileCheck/FileCheckImpl.h
@@ -387,7 +387,8 @@ class StringSubstitution : public Substitution {
   Expected<std::string> getResultRegex() const override;
 
   /// \returns the text that the string variable in this substitution matched
-  /// when defined, in a form suitable for diagnostics, or an error if the variable is undefined.
+  /// when defined, in a form suitable for diagnostics, or an error if the
+  /// variable is undefined.
   Expected<std::string> getResultForDiagnostics() const override;
 };
 

>From 3b90e547f283ef00c335fe7a991a9ca46f478c80 Mon Sep 17 00:00:00 2001
From: Gergely Meszaros <gergely.meszaros at intel.com>
Date: Thu, 26 Jun 2025 07:12:09 -0700
Subject: [PATCH 4/6] Simplify NeedsEscaping

---
 llvm/lib/FileCheck/FileCheck.cpp | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index 4a141355de969..b79f6ec5b4f04 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -304,16 +304,14 @@ Expected<std::string> StringSubstitution::getResultForDiagnostics() const {
 
   OS << '"';
   // Escape the string if it contains any characters that
-  // make it hard to read, such as tabs, newlines, quotes, and non-printable
-  // characters. These are the characters that are escaped by write_escaped(),
-  // except we do not include backslashes, because they are
-  // common in Windows paths and escaping them would make the output
-  // harder to read.
-  // However, when we do escape, backslashes are escaped as well,
-  // otherwise the output would be ambiguous.
-  const bool NeedsEscaping = llvm::any_of(*VarVal, [](char C) {
-    return C == '\t' || C == '\n' || C == '"' || !isPrint(C);
-  });
+  // make it hard to read, such as non-printable characters (including all
+  // whitespace except space) and double quotes. These are the characters that
+  // are escaped by write_escaped(), except we do not include backslashes,
+  // because they are common in Windows paths and escaping them would make the
+  // output harder to read. However, when we do escape, backslashes are escaped
+  // as well, otherwise the output would be ambiguous.
+  const bool NeedsEscaping =
+      llvm::any_of(*VarVal, [](char C) { return !isPrint(C) || C == '"'; });
   if (NeedsEscaping)
     OS.write_escaped(*VarVal);
   else

>From 937b192f9659ff6e5781744b7d9c8cb0f2a284f8 Mon Sep 17 00:00:00 2001
From: Gergely Meszaros <gergely.meszaros at intel.com>
Date: Thu, 26 Jun 2025 07:51:45 -0700
Subject: [PATCH 5/6] Even more clang-format

---
 llvm/lib/FileCheck/FileCheckImpl.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/FileCheck/FileCheckImpl.h b/llvm/lib/FileCheck/FileCheckImpl.h
index 556b1f7d68522..5cf5548cdfde1 100644
--- a/llvm/lib/FileCheck/FileCheckImpl.h
+++ b/llvm/lib/FileCheck/FileCheckImpl.h
@@ -366,8 +366,9 @@ class Substitution {
   /// \returns the index where the substitution is to be performed in RegExStr.
   size_t getIndex() const { return InsertIdx; }
 
-  /// \returns a regular expression string that matches the result of the substitution represented
-  /// by this class instance or an error if substitution failed.
+  /// \returns a regular expression string that matches the result of the
+  /// substitution represented by this class instance or an error if
+  /// substitution failed.
   virtual Expected<std::string> getResultRegex() const = 0;
 
   /// \returns a string containing the result of the substitution represented
@@ -410,7 +411,8 @@ class NumericSubstitution : public Substitution {
   Expected<std::string> getResultRegex() const override;
 
   /// \returns a string containing the result of evaluating the expression in
-  /// this substitution, in a form suitable for diagnostics, or an error if evaluation failed.
+  /// this substitution, in a form suitable for diagnostics, or an error if
+  /// evaluation failed.
   Expected<std::string> getResultForDiagnostics() const override;
 };
 

>From 1a47b85062a17bc82e9b648c4c7454d6ff48bd1a Mon Sep 17 00:00:00 2001
From: Gergely Meszaros <gergely.meszaros at intel.com>
Date: Thu, 26 Jun 2025 22:31:21 -0700
Subject: [PATCH 6/6] Rename function in unit test too

---
 llvm/unittests/FileCheck/FileCheckTest.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/unittests/FileCheck/FileCheckTest.cpp b/llvm/unittests/FileCheck/FileCheckTest.cpp
index 91fce69b9a6c4..2c4130330e970 100644
--- a/llvm/unittests/FileCheck/FileCheckTest.cpp
+++ b/llvm/unittests/FileCheck/FileCheckTest.cpp
@@ -1439,7 +1439,7 @@ TEST_F(FileCheckTest, Substitution) {
   // Substitution of an undefined string variable fails and error holds that
   // variable's name.
   StringSubstitution StringSubstitution(&Context, "VAR404", 42);
-  Expected<std::string> SubstValue = StringSubstitution.getResult();
+  Expected<std::string> SubstValue = StringSubstitution.getResultRegex();
   expectUndefErrors({"VAR404"}, SubstValue.takeError());
 
   // Numeric substitution blocks constituted of defined numeric variables are
@@ -1452,20 +1452,20 @@ TEST_F(FileCheckTest, Substitution) {
       std::move(NVarUse), ExpressionFormat(ExpressionFormat::Kind::HexUpper));
   NumericSubstitution SubstitutionN(&Context, "N", std::move(ExpressionN),
                                     /*InsertIdx=*/30);
-  SubstValue = SubstitutionN.getResult();
+  SubstValue = SubstitutionN.getResultRegex();
   ASSERT_THAT_EXPECTED(SubstValue, Succeeded());
   EXPECT_EQ("A", *SubstValue);
 
   // Substitution of an undefined numeric variable fails, error holds name of
   // undefined variable.
   NVar.clearValue();
-  SubstValue = SubstitutionN.getResult();
+  SubstValue = SubstitutionN.getResultRegex();
   expectUndefErrors({"N"}, SubstValue.takeError());
 
   // Substitution of a defined string variable returns the right value.
   Pattern P(Check::CheckPlain, &Context, 1);
   StringSubstitution = llvm::StringSubstitution(&Context, "FOO", 42);
-  SubstValue = StringSubstitution.getResult();
+  SubstValue = StringSubstitution.getResultRegex();
   ASSERT_THAT_EXPECTED(SubstValue, Succeeded());
   EXPECT_EQ("BAR", *SubstValue);
 }



More information about the llvm-commits mailing list