[clang] 15e3d39 - [clang] Fortify warning for scanf calls with field width too big.

Michael Benfield via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 27 20:00:31 PDT 2021


Author: Michael Benfield
Date: 2021-10-28T02:52:03Z
New Revision: 15e3d39110fa4449be4f56196af3bc81b623f3ab

URL: https://github.com/llvm/llvm-project/commit/15e3d39110fa4449be4f56196af3bc81b623f3ab
DIFF: https://github.com/llvm/llvm-project/commit/15e3d39110fa4449be4f56196af3bc81b623f3ab.diff

LOG: [clang] Fortify warning for scanf calls with field width too big.

Differential Revision: https://reviews.llvm.org/D111833

Added: 
    clang/test/Sema/warn-fortify-scanf.c

Modified: 
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/lib/Sema/SemaChecking.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 09482d238ff3..920146f71bd8 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -833,6 +833,10 @@ def warn_fortify_source_format_overflow : Warning<
   " but format string expands to at least %2">,
   InGroup<FortifySource>;
 
+def warn_fortify_scanf_overflow : Warning<
+  "'%0' may overflow; destination buffer in argument %1 has size "
+  "%2, but the corresponding field width plus NUL byte is %3">,
+  InGroup<FortifySource>;
 
 /// main()
 // static main() is not an error in C, just in C++.

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 147f50aeed97..e11966020ae9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -408,6 +408,50 @@ static bool SemaBuiltinCallWithStaticChain(Sema &S, CallExpr *BuiltinCall) {
 
 namespace {
 
+class ScanfDiagnosticFormatHandler
+    : public analyze_format_string::FormatStringHandler {
+  // Accepts the argument index (relative to the first destination index) of the
+  // argument whose size we want.
+  using ComputeSizeFunction =
+      llvm::function_ref<Optional<llvm::APSInt>(unsigned)>;
+
+  // Accepts the argument index (relative to the first destination index), the
+  // destination size, and the source size).
+  using DiagnoseFunction =
+      llvm::function_ref<void(unsigned, unsigned, unsigned)>;
+
+  ComputeSizeFunction ComputeSizeArgument;
+  DiagnoseFunction Diagnose;
+
+public:
+  ScanfDiagnosticFormatHandler(ComputeSizeFunction ComputeSizeArgument,
+                               DiagnoseFunction Diagnose)
+      : ComputeSizeArgument(ComputeSizeArgument), Diagnose(Diagnose) {}
+
+  bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
+                            const char *StartSpecifier,
+                            unsigned specifierLen) override {
+    auto OptionalFW = FS.getFieldWidth();
+    if (OptionalFW.getHowSpecified() !=
+        analyze_format_string::OptionalAmount::HowSpecified::Constant)
+      return true;
+
+    // We have to write the data plus a NUL byte.
+    unsigned SourceSize = OptionalFW.getConstantAmount() + 1;
+
+    auto DestSizeAPS = ComputeSizeArgument(FS.getArgIndex());
+    if (!DestSizeAPS)
+      return true;
+
+    unsigned DestSize = DestSizeAPS->getZExtValue();
+
+    if (DestSize < SourceSize)
+      Diagnose(FS.getArgIndex(), DestSize, SourceSize);
+
+    return true;
+  }
+};
+
 class EstimateSizeFormatHandler
     : public analyze_format_string::FormatStringHandler {
   size_t Size;
@@ -615,9 +659,12 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
     // (potentially) more strict checking mode. Otherwise, conservatively assume
     // type 0.
     int BOSType = 0;
-    if (const auto *POS =
-            FD->getParamDecl(Index)->getAttr<PassObjectSizeAttr>())
-      BOSType = POS->getType();
+    // This check can fail for variadic functions.
+    if (Index < FD->getNumParams()) {
+      if (const auto *POS =
+              FD->getParamDecl(Index)->getAttr<PassObjectSizeAttr>())
+        BOSType = POS->getType();
+    }
 
     const Expr *ObjArg = TheCall->getArg(Index);
     uint64_t Result;
@@ -642,6 +689,20 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
   unsigned DiagID = 0;
   bool IsChkVariant = false;
 
+  auto GetFunctionName = [&]() {
+    StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
+    // Skim off the details of whichever builtin was called to produce a better
+    // diagnostic, as it's unlikely that the user wrote the __builtin
+    // explicitly.
+    if (IsChkVariant) {
+      FunctionName = FunctionName.drop_front(std::strlen("__builtin___"));
+      FunctionName = FunctionName.drop_back(std::strlen("_chk"));
+    } else if (FunctionName.startswith("__builtin_")) {
+      FunctionName = FunctionName.drop_front(std::strlen("__builtin_"));
+    }
+    return FunctionName;
+  };
+
   switch (BuiltinID) {
   default:
     return;
@@ -661,6 +722,61 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
     break;
   }
 
+  case Builtin::BIscanf:
+  case Builtin::BIfscanf:
+  case Builtin::BIsscanf: {
+    unsigned FormatIndex = 1;
+    unsigned DataIndex = 2;
+    if (BuiltinID == Builtin::BIscanf) {
+      FormatIndex = 0;
+      DataIndex = 1;
+    }
+
+    const auto *FormatExpr =
+        TheCall->getArg(FormatIndex)->IgnoreParenImpCasts();
+
+    const auto *Format = dyn_cast<StringLiteral>(FormatExpr);
+    if (!Format)
+      return;
+
+    if (!Format->isAscii() && !Format->isUTF8())
+      return;
+
+    auto Diagnose = [&](unsigned ArgIndex, unsigned DestSize,
+                        unsigned SourceSize) {
+      DiagID = diag::warn_fortify_scanf_overflow;
+      StringRef FunctionName = GetFunctionName();
+      DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
+                          PDiag(DiagID)
+                              << FunctionName << (ArgIndex + DataIndex + 1)
+                              << DestSize << SourceSize);
+    };
+
+    StringRef FormatStrRef = Format->getString();
+    auto ShiftedComputeSizeArgument = [&](unsigned Index) {
+      return ComputeSizeArgument(Index + DataIndex);
+    };
+    ScanfDiagnosticFormatHandler H(ShiftedComputeSizeArgument, Diagnose);
+    const char *FormatBytes = FormatStrRef.data();
+    const ConstantArrayType *T =
+        Context.getAsConstantArrayType(Format->getType());
+    assert(T && "String literal not of constant array type!");
+    size_t TypeSize = T->getSize().getZExtValue();
+
+    // In case there's a null byte somewhere.
+    size_t StrLen =
+        std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0));
+
+    analyze_format_string::ParseScanfString(H, FormatBytes,
+                                            FormatBytes + StrLen, getLangOpts(),
+                                            Context.getTargetInfo());
+
+    // Unlike the other cases, in this one we have already issued the diagnostic
+    // here, so no need to continue (because unlike the other cases, here the
+    // diagnostic refers to the argument number).
+    return;
+  }
+
   case Builtin::BIsprintf:
   case Builtin::BI__builtin___sprintf_chk: {
     size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
@@ -771,15 +887,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
       SourceSize.getValue().ule(DestinationSize.getValue()))
     return;
 
-  StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
-  // Skim off the details of whichever builtin was called to produce a better
-  // diagnostic, as it's unlikely that the user wrote the __builtin explicitly.
-  if (IsChkVariant) {
-    FunctionName = FunctionName.drop_front(std::strlen("__builtin___"));
-    FunctionName = FunctionName.drop_back(std::strlen("_chk"));
-  } else if (FunctionName.startswith("__builtin_")) {
-    FunctionName = FunctionName.drop_front(std::strlen("__builtin_"));
-  }
+  StringRef FunctionName = GetFunctionName();
 
   SmallString<16> DestinationStr;
   SmallString<16> SourceStr;

diff  --git a/clang/test/Sema/warn-fortify-scanf.c b/clang/test/Sema/warn-fortify-scanf.c
new file mode 100644
index 000000000000..99a3273bec3f
--- /dev/null
+++ b/clang/test/Sema/warn-fortify-scanf.c
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify
+
+typedef struct _FILE FILE;
+extern int scanf(const char *format, ...);
+extern int fscanf(FILE *f, const char *format, ...);
+extern int sscanf(const char *input, const char *format, ...);
+
+void call_scanf() {
+  char buf10[10];
+  char buf20[20];
+  char buf30[30];
+  scanf("%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding field width plus NUL byte is 11}}
+  scanf("%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding field width plus NUL byte is 12}}
+  scanf("%4s %5s %9s", buf20, buf30, buf10);
+  scanf("%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding field width plus NUL byte is 21}}
+  scanf("%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding field width plus NUL byte is 22}}
+  scanf("%19s %5s %9s", buf20, buf30, buf10);
+  scanf("%19s %29s %9s", buf20, buf30, buf10);
+}
+
+void call_sscanf() {
+  char buf10[10];
+  char buf20[20];
+  char buf30[30];
+  sscanf("a b c", "%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding field width plus NUL byte is 11}}
+  sscanf("a b c", "%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding field width plus NUL byte is 12}}
+  sscanf("a b c", "%4s %5s %9s", buf20, buf30, buf10);
+  sscanf("a b c", "%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding field width plus NUL byte is 21}}
+  sscanf("a b c", "%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding field width plus NUL byte is 22}}
+  sscanf("a b c", "%19s %5s %9s", buf20, buf30, buf10);
+  sscanf("a b c", "%19s %29s %9s", buf20, buf30, buf10);
+}
+
+void call_fscanf() {
+  char buf10[10];
+  char buf20[20];
+  char buf30[30];
+  fscanf(0, "%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding field width plus NUL byte is 11}}
+  fscanf(0, "%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding field width plus NUL byte is 12}}
+  fscanf(0, "%4s %5s %9s", buf20, buf30, buf10);
+  fscanf(0, "%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding field width plus NUL byte is 21}}
+  fscanf(0, "%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding field width plus NUL byte is 22}}
+  fscanf(0, "%19s %5s %9s", buf20, buf30, buf10);
+  fscanf(0, "%19s %29s %9s", buf20, buf30, buf10);
+}


        


More information about the cfe-commits mailing list