[clang] [clang][Sema] Stop format size estimator upon %p to adapt to linux kernel's extension (PR #65969)
Takuya Shimizu via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 12 05:14:32 PDT 2023
https://github.com/hazohelet updated https://github.com/llvm/llvm-project/pull/65969:
>From 80d8743441ce1e25f19dbc48bce480b5becfa208 Mon Sep 17 00:00:00 2001
From: Takuya Shimizu <shimizu2486 at gmail.com>
Date: Tue, 12 Sep 2023 00:39:44 +0900
Subject: [PATCH 1/3] [clang][Sema] Stop format size estimator upon %p to adapt
to linux kernel's extension
GCC stops counting format string's size when it sees %p format in order to avoid `Wformat-truncation` false positive against Linux kernel's format extension (%pOF, for example).
This change makes clang's behavior align with GCC's.
As requested in https://github.com/llvm/llvm-project/issues/64871
---
clang/include/clang/AST/FormatString.h | 6 ++++++
clang/lib/AST/PrintfFormatString.cpp | 8 +++++++-
clang/lib/Sema/SemaChecking.cpp | 10 ++++++++++
clang/test/Sema/warn-fortify-source.c | 16 +++++++++++++---
4 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 5c4ad9baaef608c..97ce3cfe0b25c68 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -742,6 +742,12 @@ class FormatStringHandler {
return true;
}
+ virtual bool
+ ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS,
+ unsigned RemainLen) {
+ return false;
+ }
+
/// Handle mask types whose sizes are not between one and eight bytes.
virtual void handleInvalidMaskType(StringRef MaskType) {}
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index f0b9d0ecaf23461..750f584d8d37a76 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -429,7 +429,13 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
// we can recover from?
if (!FSR.hasValue())
continue;
- // We have a format specifier. Pass it to the callback.
+ // We have a format specifier.
+ // In case the handler needs to abort parsing upon specific specifiers
+ if (H.ShouldStopOnFormatSpecifier(
+ FSR.getValue(), static_cast<unsigned>(E - FSR.getStart()))) {
+ return false;
+ }
+ // Pass the format specifier to the callback.
if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
I - FSR.getStart(), Target))
return true;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 3b4ac613da76aa8..1a6ff6cfa19b661 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -860,6 +860,16 @@ class EstimateSizeFormatHandler
: Size(std::min(Format.find(0), Format.size()) +
1 /* null byte always written by sprintf */) {}
+ bool ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS,
+ unsigned RemainLen) override {
+ if (FS.getConversionSpecifier().getKind() ==
+ analyze_printf::PrintfConversionSpecifier::pArg) {
+ assert(Size >= RemainLen && "no underflow");
+ Size -= RemainLen;
+ return true;
+ }
+ return false;
+ }
bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
const char *, unsigned SpecifierLen,
const TargetInfo &) override {
diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c
index 5ed9782b26fb788..fba6c718be4a33a 100644
--- a/clang/test/Sema/warn-fortify-source.c
+++ b/clang/test/Sema/warn-fortify-source.c
@@ -85,7 +85,7 @@ void call_memset(void) {
__builtin_memset(buf, 0xff, 11); // expected-warning {{'memset' will always overflow; destination buffer has size 10, but size argument is 11}}
}
-void call_snprintf(double d) {
+void call_snprintf(double d, int *ptr) {
char buf[10];
__builtin_snprintf(buf, 10, "merp");
__builtin_snprintf(buf, 11, "merp"); // expected-warning {{'snprintf' size argument is too large; destination buffer has size 10, but size argument is 11}}
@@ -96,6 +96,11 @@ void call_snprintf(double d) {
__builtin_snprintf(buf, 1, "%.1000g", d); // expected-warning {{'snprintf' will always be truncated; specified size is 1, but format string expands to at least 2}}
__builtin_snprintf(buf, 5, "%.1000g", d);
__builtin_snprintf(buf, 5, "%.1000G", d);
+ char node_name[6];
+ __builtin_snprintf(node_name, sizeof(node_name), "%pOFn", ptr);
+ __builtin_snprintf(node_name, sizeof(node_name), "12345%pOFn", ptr);
+ __builtin_snprintf(node_name, sizeof(node_name), "123456%pOFn", ptr); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 7}}
+ __builtin_snprintf(node_name, sizeof(node_name), "1234567%pOFn", ptr); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 8}}
}
void call_vsnprintf(void) {
@@ -110,6 +115,11 @@ void call_vsnprintf(void) {
__builtin_vsnprintf(buf, 1, "%.1000g", list); // expected-warning {{'vsnprintf' will always be truncated; specified size is 1, but format string expands to at least 2}}
__builtin_vsnprintf(buf, 5, "%.1000g", list);
__builtin_vsnprintf(buf, 5, "%.1000G", list);
+ char node_name[6];
+ __builtin_snprintf(node_name, sizeof(node_name), "%pOFn", list);
+ __builtin_snprintf(node_name, sizeof(node_name), "12345%pOFn", list);
+ __builtin_snprintf(node_name, sizeof(node_name), "123456%pOFn", list); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 7}}
+ __builtin_snprintf(node_name, sizeof(node_name), "1234567%pOFn", list); // expected-warning {{'snprintf' will always be truncated; specified size is 6, but format string expands to at least 8}}
}
void call_sprintf_chk(char *buf) {
@@ -149,7 +159,7 @@ void call_sprintf_chk(char *buf) {
__builtin___sprintf_chk(buf, 1, 4, "%#x", 9);
__builtin___sprintf_chk(buf, 1, 3, "%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
__builtin___sprintf_chk(buf, 1, 4, "%p", (void *)9);
- __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
+ __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9);
__builtin___sprintf_chk(buf, 1, 3, "%+d", 9);
__builtin___sprintf_chk(buf, 1, 2, "%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}}
__builtin___sprintf_chk(buf, 1, 3, "% i", 9);
@@ -186,7 +196,7 @@ void call_sprintf(void) {
sprintf(buf, "12%#x", 9);
sprintf(buf, "123%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
sprintf(buf, "12%p", (void *)9);
- sprintf(buf, "123%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "123%p", (void *)9);
sprintf(buf, "123%+d", 9);
sprintf(buf, "1234%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
sprintf(buf, "123% i", 9);
>From 82d09fabb82aa7eec9e34dffa5d3ff934d0746aa Mon Sep 17 00:00:00 2001
From: Takuya Shimizu <shimizu2486 at gmail.com>
Date: Tue, 12 Sep 2023 17:09:45 +0900
Subject: [PATCH 2/3] Address comments from nickdesaulniers
NFC stylistic changes
---
clang/lib/AST/PrintfFormatString.cpp | 2 +-
clang/lib/Sema/SemaChecking.cpp | 19 ++++++++++++-------
2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index 750f584d8d37a76..bcbbcc3882ab57f 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -430,7 +430,7 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
if (!FSR.hasValue())
continue;
// We have a format specifier.
- // In case the handler needs to abort parsing upon specific specifiers
+ // In case the handler needs to abort parsing upon specific specifiers.
if (H.ShouldStopOnFormatSpecifier(
FSR.getValue(), static_cast<unsigned>(E - FSR.getStart()))) {
return false;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 1a6ff6cfa19b661..cf48ddd5ae1b896 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -862,13 +862,18 @@ class EstimateSizeFormatHandler
bool ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS,
unsigned RemainLen) override {
- if (FS.getConversionSpecifier().getKind() ==
- analyze_printf::PrintfConversionSpecifier::pArg) {
- assert(Size >= RemainLen && "no underflow");
- Size -= RemainLen;
- return true;
- }
- return false;
+ if (FS.getConversionSpecifier().getKind() !=
+ analyze_printf::PrintfConversionSpecifier::pArg)
+ return false;
+ // Linux Kernel has its own extension for %p format specifier (%pOF, for
+ // example)
+ // Kernel Document: https://docs.kernel.org/core-api/printk-formats.html
+ // If the format specifier is %p, format string size estimator should stop
+ // so as not to emit false-positive against kernel codes.
+ // This behavior aligns with GCC's
+ assert(Size >= RemainLen && "no underflow");
+ Size -= RemainLen;
+ return true;
}
bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
const char *, unsigned SpecifierLen,
>From 84481f55b1cd3c37acc063d36b3515328aa08b20 Mon Sep 17 00:00:00 2001
From: Takuya Shimizu <shimizu2486 at gmail.com>
Date: Tue, 12 Sep 2023 21:13:59 +0900
Subject: [PATCH 3/3] Check subsequent character to say whether or not the
format is likely to be linux extension
---
clang/docs/ReleaseNotes.rst | 4 ++
clang/include/clang/AST/FormatString.h | 3 +-
clang/lib/AST/PrintfFormatString.cpp | 4 +-
clang/lib/Sema/SemaChecking.cpp | 62 ++++++++++++++++++++++----
clang/test/Sema/warn-fortify-source.c | 4 +-
5 files changed, 63 insertions(+), 14 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index cc8b2c3808933cb..b0d544c39d73f1a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -163,6 +163,10 @@ Improvements to Clang's diagnostics
- Clang's ``-Wfortify-source`` now diagnoses ``snprintf`` call that is known to
result in string truncation.
(`#64871: <https://github.com/llvm/llvm-project/issues/64871>`_).
+ Clang stops calculating the minimum length of the output format string upon %p
+ if the succeeding character can be a part of the format specifier in linux's
+ format extension in order to avoid false positive warnings against the linux
+ code base.
Also clang no longer emits false positive warnings about the output length of
``%g`` format specifier.
- Clang now emits ``-Wcast-qual`` for functional-style cast expressions.
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 97ce3cfe0b25c68..81bbeb00b0766d8 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -744,7 +744,8 @@ class FormatStringHandler {
virtual bool
ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS,
- unsigned RemainLen) {
+ const char *StartSpecifier, const char *I,
+ const char *E) {
return false;
}
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index bcbbcc3882ab57f..2297135a96e8976 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -431,10 +431,8 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
continue;
// We have a format specifier.
// In case the handler needs to abort parsing upon specific specifiers.
- if (H.ShouldStopOnFormatSpecifier(
- FSR.getValue(), static_cast<unsigned>(E - FSR.getStart()))) {
+ if (H.ShouldStopOnFormatSpecifier(FSR.getValue(), FSR.getStart(), I, E))
return false;
- }
// Pass the format specifier to the callback.
if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
I - FSR.getStart(), Target))
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index cf48ddd5ae1b896..9ac200497ccc181 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -851,6 +851,50 @@ class ScanfDiagnosticFormatHandler
}
};
+/// `I` points to the next character of `%p` format.
+/// This functon checks if the subsequent character can be linux kernel's
+/// extnded format specifier
+static inline constexpr bool canBeLinuxFormatExtension(const char *I,
+ const char *E) {
+ assert(I < E && "format string not yet exhausted");
+ // Kernel Document: https://docs.kernel.org/core-api/printk-formats.html
+ switch (*I) {
+ default:
+ return false;
+ case 'S':
+ case 's':
+ case 'B':
+ case 'R':
+ case 'r':
+ case 'h':
+ case 'b':
+ case 'M':
+ case 'm':
+ case 'I':
+ case 'i':
+ case 'E':
+ case 'U':
+ case 'V':
+ case 'K':
+ case 'N':
+ case '4':
+ case 'a':
+ case 'd':
+ case 't':
+ case 'C':
+ case 'D':
+ case 'g':
+ case 'G':
+ case 'O':
+ case 'f':
+ case 'x':
+ case 'e':
+ case 'u':
+ case 'k':
+ return true;
+ }
+}
+
class EstimateSizeFormatHandler
: public analyze_format_string::FormatStringHandler {
size_t Size;
@@ -861,16 +905,18 @@ class EstimateSizeFormatHandler
1 /* null byte always written by sprintf */) {}
bool ShouldStopOnFormatSpecifier(const analyze_printf::PrintfSpecifier &FS,
- unsigned RemainLen) override {
- if (FS.getConversionSpecifier().getKind() !=
- analyze_printf::PrintfConversionSpecifier::pArg)
- return false;
+ const char *StartSpecifier, const char *I,
+ const char *E) override {
// Linux Kernel has its own extension for %p format specifier (%pOF, for
// example)
- // Kernel Document: https://docs.kernel.org/core-api/printk-formats.html
- // If the format specifier is %p, format string size estimator should stop
- // so as not to emit false-positive against kernel codes.
- // This behavior aligns with GCC's
+ // If the format is likely to be linux kernel extension, we should stop
+ // the format size estimation so as not to emit false positive in linux
+ // codes.
+ if (FS.getConversionSpecifier().getKind() !=
+ analyze_printf::PrintfConversionSpecifier::pArg ||
+ I == E || !canBeLinuxFormatExtension(I, E))
+ return false;
+ unsigned RemainLen = static_cast<unsigned>(E - StartSpecifier);
assert(Size >= RemainLen && "no underflow");
Size -= RemainLen;
return true;
diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c
index fba6c718be4a33a..6ad507861562f96 100644
--- a/clang/test/Sema/warn-fortify-source.c
+++ b/clang/test/Sema/warn-fortify-source.c
@@ -159,7 +159,7 @@ void call_sprintf_chk(char *buf) {
__builtin___sprintf_chk(buf, 1, 4, "%#x", 9);
__builtin___sprintf_chk(buf, 1, 3, "%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
__builtin___sprintf_chk(buf, 1, 4, "%p", (void *)9);
- __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9);
+ __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
__builtin___sprintf_chk(buf, 1, 3, "%+d", 9);
__builtin___sprintf_chk(buf, 1, 2, "%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}}
__builtin___sprintf_chk(buf, 1, 3, "% i", 9);
@@ -196,7 +196,7 @@ void call_sprintf(void) {
sprintf(buf, "12%#x", 9);
sprintf(buf, "123%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
sprintf(buf, "12%p", (void *)9);
- sprintf(buf, "123%p", (void *)9);
+ sprintf(buf, "123%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
sprintf(buf, "123%+d", 9);
sprintf(buf, "1234%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
sprintf(buf, "123% i", 9);
More information about the cfe-commits
mailing list