[clang] [clang][Sema] Stop format size estimator upon %p to adapt to linux kernel's extension (PR #65969)

Takuya Shimizu via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 12 05:14:32 PDT 2023


https://github.com/hazohelet updated https://github.com/llvm/llvm-project/pull/65969:

>From 80d8743441ce1e25f19dbc48bce480b5becfa208 Mon Sep 17 00:00:00 2001
From: Takuya Shimizu <shimizu2486 at gmail.com>
Date: Tue, 12 Sep 2023 00:39:44 +0900
Subject: [PATCH 1/3] [clang][Sema] Stop format size estimator upon %p to adapt
 to linux kernel's extension

GCC stops counting format string's size when it sees %p format in order to avoid `Wformat-truncation` false positive against Linux kernel's format extension (%pOF, for example).
This change makes clang's behavior align with GCC's.
As requested in https://github.com/llvm/llvm-project/issues/64871
---
 clang/include/clang/AST/FormatString.h |  6 ++++++
 clang/lib/AST/PrintfFormatString.cpp   |  8 +++++++-
 clang/lib/Sema/SemaChecking.cpp        | 10 ++++++++++
 clang/test/Sema/warn-fortify-source.c  | 16 +++++++++++++---
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 5c4ad9baaef608c..97ce3cfe0b25c68 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -742,6 +742,12 @@ class FormatStringHandler {
     return true;
   }
 
+  virtual bool
+  ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS,
+                              unsigned RemainLen) {
+    return false;
+  }
+
   /// Handle mask types whose sizes are not between one and eight bytes.
   virtual void handleInvalidMaskType(StringRef MaskType) {}
 
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index f0b9d0ecaf23461..750f584d8d37a76 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -429,7 +429,13 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
     // we can recover from?
     if (!FSR.hasValue())
       continue;
-    // We have a format specifier.  Pass it to the callback.
+    // We have a format specifier.
+    // In case the handler needs to abort parsing upon specific specifiers
+    if (H.ShouldStopOnFormatSpecifier(
+            FSR.getValue(), static_cast<unsigned>(E - FSR.getStart()))) {
+      return false;
+    }
+    // Pass the format specifier to the callback.
     if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
                                  I - FSR.getStart(), Target))
       return true;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 3b4ac613da76aa8..1a6ff6cfa19b661 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -860,6 +860,16 @@ class EstimateSizeFormatHandler
       : Size(std::min(Format.find(0), Format.size()) +
              1 /* null byte always written by sprintf */) {}
 
+  bool ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS,
+                                   unsigned RemainLen) override {
+    if (FS.getConversionSpecifier().getKind() ==
+        analyze_printf::PrintfConversionSpecifier::pArg) {
+      assert(Size >= RemainLen && "no underflow");
+      Size -= RemainLen;
+      return true;
+    }
+    return false;
+  }
   bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
                              const char *, unsigned SpecifierLen,
                              const TargetInfo &) override {
diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c
index 5ed9782b26fb788..fba6c718be4a33a 100644
--- a/clang/test/Sema/warn-fortify-source.c
+++ b/clang/test/Sema/warn-fortify-source.c
@@ -85,7 +85,7 @@ void call_memset(void) {
   __builtin_memset(buf, 0xff, 11); // expected-warning {{'memset' will always overflow; destination buffer has size 10, but size argument is 11}}
 }
 
-void call_snprintf(double d) {
+void call_snprintf(double d, int *ptr) {
   char buf[10];
   __builtin_snprintf(buf, 10, "merp");
   __builtin_snprintf(buf, 11, "merp"); // expected-warning {{'snprintf' size argument is too large; destination buffer has size 10, but size argument is 11}}
@@ -96,6 +96,11 @@ void call_snprintf(double d) {
   __builtin_snprintf(buf, 1, "%.1000g", d); // expected-warning {{'snprintf' will always be truncated; specified size is 1, but format string expands to at least 2}}
   __builtin_snprintf(buf, 5, "%.1000g", d);
   __builtin_snprintf(buf, 5, "%.1000G", d);
+  char node_name[6];
+  __builtin_snprintf(node_name, sizeof(node_name), "%pOFn", ptr);
+  __builtin_snprintf(node_name, sizeof(node_name), "12345%pOFn", ptr);
+  __builtin_snprintf(node_name, sizeof(node_name), "123456%pOFn", ptr); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 7}}
+  __builtin_snprintf(node_name, sizeof(node_name), "1234567%pOFn", ptr); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 8}}
 }
 
 void call_vsnprintf(void) {
@@ -110,6 +115,11 @@ void call_vsnprintf(void) {
   __builtin_vsnprintf(buf, 1, "%.1000g", list); // expected-warning {{'vsnprintf' will always be truncated; specified size is 1, but format string expands to at least 2}}
   __builtin_vsnprintf(buf, 5, "%.1000g", list);
   __builtin_vsnprintf(buf, 5, "%.1000G", list);
+  char node_name[6];
+  __builtin_snprintf(node_name, sizeof(node_name), "%pOFn", list);
+  __builtin_snprintf(node_name, sizeof(node_name), "12345%pOFn", list);
+  __builtin_snprintf(node_name, sizeof(node_name), "123456%pOFn", list); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 7}}
+  __builtin_snprintf(node_name, sizeof(node_name), "1234567%pOFn", list); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 8}}
 }
 
 void call_sprintf_chk(char *buf) {
@@ -149,7 +159,7 @@ void call_sprintf_chk(char *buf) {
   __builtin___sprintf_chk(buf, 1, 4, "%#x", 9);
   __builtin___sprintf_chk(buf, 1, 3, "%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
   __builtin___sprintf_chk(buf, 1, 4, "%p", (void *)9);
-  __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
+  __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9);
   __builtin___sprintf_chk(buf, 1, 3, "%+d", 9);
   __builtin___sprintf_chk(buf, 1, 2, "%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}}
   __builtin___sprintf_chk(buf, 1, 3, "% i", 9);
@@ -186,7 +196,7 @@ void call_sprintf(void) {
   sprintf(buf, "12%#x", 9);
   sprintf(buf, "123%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
   sprintf(buf, "12%p", (void *)9);
-  sprintf(buf, "123%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+  sprintf(buf, "123%p", (void *)9);
   sprintf(buf, "123%+d", 9);
   sprintf(buf, "1234%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
   sprintf(buf, "123% i", 9);

>From 82d09fabb82aa7eec9e34dffa5d3ff934d0746aa Mon Sep 17 00:00:00 2001
From: Takuya Shimizu <shimizu2486 at gmail.com>
Date: Tue, 12 Sep 2023 17:09:45 +0900
Subject: [PATCH 2/3] Address comments from nickdesaulniers

NFC stylistic changes
---
 clang/lib/AST/PrintfFormatString.cpp |  2 +-
 clang/lib/Sema/SemaChecking.cpp      | 19 ++++++++++++-------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index 750f584d8d37a76..bcbbcc3882ab57f 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -430,7 +430,7 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
     if (!FSR.hasValue())
       continue;
     // We have a format specifier.
-    // In case the handler needs to abort parsing upon specific specifiers
+    // In case the handler needs to abort parsing upon specific specifiers.
     if (H.ShouldStopOnFormatSpecifier(
             FSR.getValue(), static_cast<unsigned>(E - FSR.getStart()))) {
       return false;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 1a6ff6cfa19b661..cf48ddd5ae1b896 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -862,13 +862,18 @@ class EstimateSizeFormatHandler
 
   bool ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS,
                                    unsigned RemainLen) override {
-    if (FS.getConversionSpecifier().getKind() ==
-        analyze_printf::PrintfConversionSpecifier::pArg) {
-      assert(Size >= RemainLen && "no underflow");
-      Size -= RemainLen;
-      return true;
-    }
-    return false;
+    if (FS.getConversionSpecifier().getKind() !=
+        analyze_printf::PrintfConversionSpecifier::pArg)
+      return false;
+    // Linux Kernel has its own extension for %p format specifier (%pOF, for
+    // example)
+    // Kernel Document: https://docs.kernel.org/core-api/printk-formats.html
+    // If the format specifier is %p, format string size estimator should stop
+    // so as not to emit false-positive against kernel codes.
+    // This behavior aligns with GCC's
+    assert(Size >= RemainLen && "no underflow");
+    Size -= RemainLen;
+    return true;
   }
   bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
                              const char *, unsigned SpecifierLen,

>From 84481f55b1cd3c37acc063d36b3515328aa08b20 Mon Sep 17 00:00:00 2001
From: Takuya Shimizu <shimizu2486 at gmail.com>
Date: Tue, 12 Sep 2023 21:13:59 +0900
Subject: [PATCH 3/3] Check subsequent character to say whether or not the
 format is likely to be linux extension

---
 clang/docs/ReleaseNotes.rst            |  4 ++
 clang/include/clang/AST/FormatString.h |  3 +-
 clang/lib/AST/PrintfFormatString.cpp   |  4 +-
 clang/lib/Sema/SemaChecking.cpp        | 62 ++++++++++++++++++++++----
 clang/test/Sema/warn-fortify-source.c  |  4 +-
 5 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index cc8b2c3808933cb..b0d544c39d73f1a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -163,6 +163,10 @@ Improvements to Clang's diagnostics
 - Clang's ``-Wfortify-source`` now diagnoses ``snprintf`` call that is known to
   result in string truncation.
   (`#64871: <https://github.com/llvm/llvm-project/issues/64871>`_).
+  Clang stops calculating the minimum length of the output format string upon %p
+  if the succeeding character can be a part of the format specifier in linux's
+  format extension in order to avoid false positive warnings against the linux
+  code base.
   Also clang no longer emits false positive warnings about the output length of
   ``%g`` format specifier.
 - Clang now emits ``-Wcast-qual`` for functional-style cast expressions.
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 97ce3cfe0b25c68..81bbeb00b0766d8 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -744,7 +744,8 @@ class FormatStringHandler {
 
   virtual bool
   ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS,
-                              unsigned RemainLen) {
+                              const char *StartSpecifier, const char *I,
+                              const char *E) {
     return false;
   }
 
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index bcbbcc3882ab57f..2297135a96e8976 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -431,10 +431,8 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
       continue;
     // We have a format specifier.
     // In case the handler needs to abort parsing upon specific specifiers.
-    if (H.ShouldStopOnFormatSpecifier(
-            FSR.getValue(), static_cast<unsigned>(E - FSR.getStart()))) {
+    if (H.ShouldStopOnFormatSpecifier(FSR.getValue(), FSR.getStart(), I, E))
       return false;
-    }
     // Pass the format specifier to the callback.
     if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
                                  I - FSR.getStart(), Target))
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index cf48ddd5ae1b896..9ac200497ccc181 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -851,6 +851,50 @@ class ScanfDiagnosticFormatHandler
   }
 };
 
+/// `I` points to the next character of `%p` format.
+/// This functon checks if the subsequent character can be linux kernel's
+/// extnded format specifier
+static inline constexpr bool canBeLinuxFormatExtension(const char *I,
+                                                       const char *E) {
+  assert(I < E && "format string not yet exhausted");
+  // Kernel Document: https://docs.kernel.org/core-api/printk-formats.html
+  switch (*I) {
+  default:
+    return false;
+  case 'S':
+  case 's':
+  case 'B':
+  case 'R':
+  case 'r':
+  case 'h':
+  case 'b':
+  case 'M':
+  case 'm':
+  case 'I':
+  case 'i':
+  case 'E':
+  case 'U':
+  case 'V':
+  case 'K':
+  case 'N':
+  case '4':
+  case 'a':
+  case 'd':
+  case 't':
+  case 'C':
+  case 'D':
+  case 'g':
+  case 'G':
+  case 'O':
+  case 'f':
+  case 'x':
+  case 'e':
+  case 'u':
+  case 'k':
+    return true;
+  }
+}
+
 class EstimateSizeFormatHandler
     : public analyze_format_string::FormatStringHandler {
   size_t Size;
@@ -861,16 +905,18 @@ class EstimateSizeFormatHandler
              1 /* null byte always written by sprintf */) {}
 
   bool ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS,
-                                   unsigned RemainLen) override {
-    if (FS.getConversionSpecifier().getKind() !=
-        analyze_printf::PrintfConversionSpecifier::pArg)
-      return false;
+                                   const char *StartSpecifier, const char *I,
+                                   const char *E) override {
     // Linux Kernel has its own extension for %p format specifier (%pOF, for
     // example)
-    // Kernel Document: https://docs.kernel.org/core-api/printk-formats.html
-    // If the format specifier is %p, format string size estimator should stop
-    // so as not to emit false-positive against kernel codes.
-    // This behavior aligns with GCC's
+    // If the format is likely to be linux kernel extension, we should stop
+    // the format size estimation so as not to emit false positive in linux
+    // codes.
+    if (FS.getConversionSpecifier().getKind() !=
+            analyze_printf::PrintfConversionSpecifier::pArg ||
+        I == E || !canBeLinuxFormatExtension(I, E))
+      return false;
+    unsigned RemainLen = static_cast<unsigned>(E - StartSpecifier);
     assert(Size >= RemainLen && "no underflow");
     Size -= RemainLen;
     return true;
diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c
index fba6c718be4a33a..6ad507861562f96 100644
--- a/clang/test/Sema/warn-fortify-source.c
+++ b/clang/test/Sema/warn-fortify-source.c
@@ -159,7 +159,7 @@ void call_sprintf_chk(char *buf) {
   __builtin___sprintf_chk(buf, 1, 4, "%#x", 9);
   __builtin___sprintf_chk(buf, 1, 3, "%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
   __builtin___sprintf_chk(buf, 1, 4, "%p", (void *)9);
-  __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9);
+  __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
   __builtin___sprintf_chk(buf, 1, 3, "%+d", 9);
   __builtin___sprintf_chk(buf, 1, 2, "%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}}
   __builtin___sprintf_chk(buf, 1, 3, "% i", 9);
@@ -196,7 +196,7 @@ void call_sprintf(void) {
   sprintf(buf, "12%#x", 9);
   sprintf(buf, "123%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
   sprintf(buf, "12%p", (void *)9);
-  sprintf(buf, "123%p", (void *)9);
+  sprintf(buf, "123%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
   sprintf(buf, "123%+d", 9);
   sprintf(buf, "1234%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
   sprintf(buf, "123% i", 9);



More information about the cfe-commits mailing list