[clang] 0c9c9dd - [clang][Sema] Add truncation warning on fortified snprintf

Takuya Shimizu via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 25 22:41:39 PDT 2023


Author: Takuya Shimizu
Date: 2023-08-26T14:41:05+09:00
New Revision: 0c9c9dd9a24f9d715d950fef0ac7aae01437af96

URL: https://github.com/llvm/llvm-project/commit/0c9c9dd9a24f9d715d950fef0ac7aae01437af96
DIFF: https://github.com/llvm/llvm-project/commit/0c9c9dd9a24f9d715d950fef0ac7aae01437af96.diff

LOG: [clang][Sema] Add truncation warning on fortified snprintf

This patch warns on snprintf calls whose n argument is known to be smaller than the size of the formatted string like

```
char buf[5];
snprintf(buf, 5, "Hello");
```
This is a counterpart of gcc's Wformat-truncation
Fixes https://github.com/llvm/llvm-project/issues/64871

Reviewed By: aaron.ballman, nickdesaulniers
Differential Revision: https://reviews.llvm.org/D158562

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/lib/Sema/SemaChecking.cpp
    clang/test/Analysis/taint-generic.c
    clang/test/Sema/format-strings.c
    clang/test/Sema/warn-fortify-source.c

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 0fff49c2c5108a..1ba4215e603349 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -150,6 +150,11 @@ Improvements to Clang's diagnostics
 - Clang constexpr evaluator now diagnoses compound assignment operators against
   uninitialized variables as a read of uninitialized object.
   (`#51536 <https://github.com/llvm/llvm-project/issues/51536>_`)
+- Clang's ``-Wfortify-source`` now diagnoses ``snprintf`` call that is known to
+  result in string truncation.
+  (`#64871: <https://github.com/llvm/llvm-project/issues/64871>`_).
+  Also clang no longer emits false positive warnings about the output length of
+  ``%g`` format specifier.
 
 Bug Fixes in This Version
 -------------------------

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index cf6f422ee47167..7f0cfb29cddeac 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -857,6 +857,11 @@ def warn_fortify_source_format_overflow : Warning<
   " but format string expands to at least %2">,
   InGroup<FortifySource>;
 
+def warn_fortify_source_format_truncation: Warning<
+  "'%0' will always be truncated; specified size is %1,"
+  " but format string expands to at least %2">,
+  InGroup<FortifySource>;
+
 def warn_fortify_scanf_overflow : Warning<
   "'%0' may overflow; destination buffer in argument %1 has size "
   "%2, but the corresponding specifier may require size %3">,

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e3b4d151536528..c78a6b9c510767 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -888,9 +888,12 @@ class EstimateSizeFormatHandler
       break;
 
     // %g style conversion switches between %f or %e style dynamically.
-    // %f always takes less space, so default to it.
+    // %g removes trailing zeros, and does not print decimal point if there are
+    // no digits that follow it. Thus %g can print a single digit.
     case analyze_format_string::ConversionSpecifier::gArg:
     case analyze_format_string::ConversionSpecifier::GArg:
+      Size += 1;
+      break;
 
     // Floating point number in the form '[+]ddd.ddd'.
     case analyze_format_string::ConversionSpecifier::fArg:
@@ -1032,6 +1035,23 @@ class EstimateSizeFormatHandler
 
 } // namespace
 
+static bool ProcessFormatStringLiteral(const Expr *FormatExpr,
+                                       StringRef &FormatStrRef, size_t &StrLen,
+                                       ASTContext &Context) {
+  if (const auto *Format = dyn_cast<StringLiteral>(FormatExpr);
+      Format && (Format->isOrdinary() || Format->isUTF8())) {
+    FormatStrRef = Format->getString();
+    const ConstantArrayType *T =
+        Context.getAsConstantArrayType(Format->getType());
+    assert(T && "String literal not of constant array type!");
+    size_t TypeSize = T->getSize().getZExtValue();
+    // In case there's a null byte somewhere.
+    StrLen = std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0));
+    return true;
+  }
+  return false;
+}
+
 void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
                                                CallExpr *TheCall) {
   if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
@@ -1183,11 +1203,9 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
     const auto *FormatExpr =
         TheCall->getArg(FormatIndex)->IgnoreParenImpCasts();
 
-    const auto *Format = dyn_cast<StringLiteral>(FormatExpr);
-    if (!Format)
-      return;
-
-    if (!Format->isOrdinary() && !Format->isUTF8())
+    StringRef FormatStrRef;
+    size_t StrLen;
+    if (!ProcessFormatStringLiteral(FormatExpr, FormatStrRef, StrLen, Context))
       return;
 
     auto Diagnose = [&](unsigned ArgIndex, unsigned DestSize,
@@ -1200,21 +1218,11 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
                                         << DestSize << SourceSize);
     };
 
-    StringRef FormatStrRef = Format->getString();
     auto ShiftedComputeSizeArgument = [&](unsigned Index) {
       return ComputeSizeArgument(Index + DataIndex);
     };
     ScanfDiagnosticFormatHandler H(ShiftedComputeSizeArgument, Diagnose);
     const char *FormatBytes = FormatStrRef.data();
-    const ConstantArrayType *T =
-        Context.getAsConstantArrayType(Format->getType());
-    assert(T && "String literal not of constant array type!");
-    size_t TypeSize = T->getSize().getZExtValue();
-
-    // In case there's a null byte somewhere.
-    size_t StrLen =
-        std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0));
-
     analyze_format_string::ParseScanfString(H, FormatBytes,
                                             FormatBytes + StrLen, getLangOpts(),
                                             Context.getTargetInfo());
@@ -1230,22 +1238,11 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
     size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
     auto *FormatExpr = TheCall->getArg(FormatIndex)->IgnoreParenImpCasts();
 
-    if (auto *Format = dyn_cast<StringLiteral>(FormatExpr)) {
-
-      if (!Format->isOrdinary() && !Format->isUTF8())
-        return;
-
-      StringRef FormatStrRef = Format->getString();
+    StringRef FormatStrRef;
+    size_t StrLen;
+    if (ProcessFormatStringLiteral(FormatExpr, FormatStrRef, StrLen, Context)) {
       EstimateSizeFormatHandler H(FormatStrRef);
       const char *FormatBytes = FormatStrRef.data();
-      const ConstantArrayType *T =
-          Context.getAsConstantArrayType(Format->getType());
-      assert(T && "String literal not of constant array type!");
-      size_t TypeSize = T->getSize().getZExtValue();
-
-      // In case there's a null byte somewhere.
-      size_t StrLen =
-          std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0));
       if (!analyze_format_string::ParsePrintfString(
               H, FormatBytes, FormatBytes + StrLen, getLangOpts(),
               Context.getTargetInfo(), false)) {
@@ -1326,8 +1323,28 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
   case Builtin::BI__builtin_vsnprintf: {
     DiagID = diag::warn_fortify_source_size_mismatch;
     SourceSize = ComputeExplicitObjectSizeArgument(1);
+    const auto *FormatExpr = TheCall->getArg(2)->IgnoreParenImpCasts();
+    StringRef FormatStrRef;
+    size_t StrLen;
+    if (SourceSize &&
+        ProcessFormatStringLiteral(FormatExpr, FormatStrRef, StrLen, Context)) {
+      EstimateSizeFormatHandler H(FormatStrRef);
+      const char *FormatBytes = FormatStrRef.data();
+      if (!analyze_format_string::ParsePrintfString(
+              H, FormatBytes, FormatBytes + StrLen, getLangOpts(),
+              Context.getTargetInfo(), /*isFreeBSDKPrintf=*/false)) {
+        llvm::APSInt FormatSize =
+            llvm::APSInt::getUnsigned(H.getSizeLowerBound())
+                .extOrTrunc(SizeTypeWidth);
+        if (FormatSize > *SourceSize && *SourceSize != 0) {
+          DiagID = diag::warn_fortify_source_format_truncation;
+          DestinationSize = SourceSize;
+          SourceSize = FormatSize;
+          break;
+        }
+      }
+    }
     DestinationSize = ComputeSizeArgument(0);
-    break;
   }
   }
 

diff  --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index 9199b3510516e0..b7906d201e4fad 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -229,6 +229,7 @@ void testTaintSystemCall2(void) {
   char addr[128];
   scanf("%s", addr);
   __builtin_snprintf(buffern, 10, "/bin/mail %s < /tmp/email", addr);
+  // expected-warning at -1 {{'snprintf' will always be truncated; specified size is 10, but format string expands to at least 24}}
   system(buffern); // expected-warning {{Untrusted data is passed to a system call}}
 }
 

diff  --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c
index 56d3056d55756d..5d7771d4f57e45 100644
--- a/clang/test/Sema/format-strings.c
+++ b/clang/test/Sema/format-strings.c
@@ -202,7 +202,8 @@ void check_invalid_specifier(FILE* fp, char *buf)
   printf("%s%lv%d","unix",10,20); // expected-warning {{invalid conversion specifier 'v'}} expected-warning {{data argument not used by format string}}
   fprintf(fp,"%%%l"); // expected-warning {{incomplete format specifier}}
   sprintf(buf,"%%%%%ld%d%d", 1, 2, 3); // expected-warning{{format specifies type 'long' but the argument has type 'int'}}
-  snprintf(buf, 2, "%%%%%ld%;%d", 1, 2, 3); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} expected-warning {{invalid conversion specifier ';'}} expected-warning {{data argument not used by format string}}
+  snprintf(buf, 2, "%%%%%ld%;%d", 1, 2, 3); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} expected-warning {{invalid conversion specifier ';'}} expected-warning {{data argument not used by format string}} \
+                                            // expected-warning{{'snprintf' will always be truncated; specified size is 2, but format string expands to at least 7}}
 }
 
 void check_null_char_string(char* b)

diff  --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c
index 91c204dd9461ac..5ed9782b26fb78 100644
--- a/clang/test/Sema/warn-fortify-source.c
+++ b/clang/test/Sema/warn-fortify-source.c
@@ -2,6 +2,8 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify -DUSE_BUILTINS
 // RUN: %clang_cc1 -xc++ -triple x86_64-apple-macosx10.14.0 %s -verify
 // RUN: %clang_cc1 -xc++ -triple x86_64-apple-macosx10.14.0 %s -verify -DUSE_BUILTINS
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 -Wno-fortify-source %s -verify=nofortify
+// RUN: %clang_cc1 -xc++ -triple x86_64-apple-macosx10.14.0 -Wno-fortify-source %s -verify=nofortify
 
 typedef unsigned long size_t;
 
@@ -83,10 +85,17 @@ void call_memset(void) {
   __builtin_memset(buf, 0xff, 11); // expected-warning {{'memset' will always overflow; destination buffer has size 10, but size argument is 11}}
 }
 
-void call_snprintf(void) {
+void call_snprintf(double d) {
   char buf[10];
   __builtin_snprintf(buf, 10, "merp");
   __builtin_snprintf(buf, 11, "merp"); // expected-warning {{'snprintf' size argument is too large; destination buffer has size 10, but size argument is 11}}
+  __builtin_snprintf(buf, 0, "merp");
+  __builtin_snprintf(buf, 3, "merp"); // expected-warning {{'snprintf' will always be truncated; specified size is 3, but format string expands to at least 5}}
+  __builtin_snprintf(buf, 4, "merp"); // expected-warning {{'snprintf' will always be truncated; specified size is 4, but format string expands to at least 5}}
+  __builtin_snprintf(buf, 5, "merp");
+  __builtin_snprintf(buf, 1, "%.1000g", d); // expected-warning {{'snprintf' will always be truncated; specified size is 1, but format string expands to at least 2}}
+  __builtin_snprintf(buf, 5, "%.1000g", d);
+  __builtin_snprintf(buf, 5, "%.1000G", d);
 }
 
 void call_vsnprintf(void) {
@@ -94,14 +103,23 @@ void call_vsnprintf(void) {
   __builtin_va_list list;
   __builtin_vsnprintf(buf, 10, "merp", list);
   __builtin_vsnprintf(buf, 11, "merp", list); // expected-warning {{'vsnprintf' size argument is too large; destination buffer has size 10, but size argument is 11}}
+  __builtin_vsnprintf(buf, 0, "merp", list);
+  __builtin_vsnprintf(buf, 3, "merp", list); // expected-warning {{'vsnprintf' will always be truncated; specified size is 3, but format string expands to at least 5}}
+  __builtin_vsnprintf(buf, 4, "merp", list); // expected-warning {{'vsnprintf' will always be truncated; specified size is 4, but format string expands to at least 5}}
+  __builtin_vsnprintf(buf, 5, "merp", list);
+  __builtin_vsnprintf(buf, 1, "%.1000g", list); // expected-warning {{'vsnprintf' will always be truncated; specified size is 1, but format string expands to at least 2}}
+  __builtin_vsnprintf(buf, 5, "%.1000g", list);
+  __builtin_vsnprintf(buf, 5, "%.1000G", list);
 }
 
 void call_sprintf_chk(char *buf) {
   __builtin___sprintf_chk(buf, 1, 6, "hell\n");
   __builtin___sprintf_chk(buf, 1, 5, "hell\n");     // expected-warning {{'sprintf' will always overflow; destination buffer has size 5, but format string expands to at least 6}}
-  __builtin___sprintf_chk(buf, 1, 6, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}}
-  __builtin___sprintf_chk(buf, 1, 2, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}}
-  // expected-warning at -1 {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 5}}
+  __builtin___sprintf_chk(buf, 1, 6, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}} \
+                                                    // nofortify-warning {{format string contains '\0' within the string body}}
+  __builtin___sprintf_chk(buf, 1, 2, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}} \
+                                                    // nofortify-warning {{format string contains '\0' within the string body}}
+  // expected-warning at -2 {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 5}}
   __builtin___sprintf_chk(buf, 1, 6, "hello");
   __builtin___sprintf_chk(buf, 1, 5, "hello"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 5, but format string expands to at least 6}}
   __builtin___sprintf_chk(buf, 1, 2, "%c", '9');
@@ -150,9 +168,11 @@ void call_sprintf_chk(char *buf) {
 
 void call_sprintf(void) {
   char buf[6];
-  sprintf(buf, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}}
-  sprintf(buf, "hello b\0y"); // expected-warning {{format string contains '\0' within the string body}}
-  // expected-warning at -1 {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 8}}
+  sprintf(buf, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}} \
+                              // nofortify-warning {{format string contains '\0' within the string body}}
+  sprintf(buf, "hello b\0y"); // expected-warning {{format string contains '\0' within the string body}} \
+                              // nofortify-warning {{format string contains '\0' within the string body}}
+  // expected-warning at -2 {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 8}}
   sprintf(buf, "hello");
   sprintf(buf, "hello!"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
   sprintf(buf, "1234%%");


        


More information about the cfe-commits mailing list