[clang] [clang][analyzer] Model more wide-character versions of string functions (PR #113908)

via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 28 06:17:25 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Arseniy Zaostrovnykh (necto)

<details>
<summary>Changes</summary>

Model:
- `wcscpy`
- `wcsncpy`
- `wcscat`
- `wcsncat`
- `swprintf`
- `wmemset`
- `wcscmp` (partially)
- `wcsncmp` (partially)

All models draw from their regular-char counterparts.

Additionally, `sprintf`, `snprintf`, and `swprintf` now report `nullptr` passed as the destination buffer.

CPP-5751

---

Patch is 77.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113908.diff


4 Files Affected:

- (modified) clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (+270-124) 
- (modified) clang/test/Analysis/string.cpp (+4) 
- (added) clang/test/Analysis/wstring-suppress-oob.c (+160) 
- (modified) clang/test/Analysis/wstring.c (+876-9) 


``````````diff
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 21a2d8828249d1..fce958a3fd3698 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InterCheckerAPI.h"
+#include "clang/AST/CharUnits.h"
 #include "clang/AST/OperationKinds.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/CharInfo.h"
@@ -128,26 +129,39 @@ class CStringChecker : public Checker< eval::Call,
   using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
                                      const CallEvent &)>;
 
+  template <typename Handler> static FnCheck forRegularChars(Handler fn) {
+    return [fn](const CStringChecker *Checker, CheckerContext &C,
+                const CallEvent &Call) { (Checker->*fn)(C, Call, CK_Regular); };
+  }
+
+  template <typename Handler> static FnCheck forWideChars(Handler fn) {
+    return [fn](const CStringChecker *Checker, CheckerContext &C,
+                const CallEvent &Call) { (Checker->*fn)(C, Call, CK_Wide); };
+  }
+
   CallDescriptionMap<FnCheck> Callbacks = {
       {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
-       std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
+       forRegularChars(&CStringChecker::evalMemcpy)},
       {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
-       std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
+       forWideChars(&CStringChecker::evalMemcpy)},
       {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
-       std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
+       forRegularChars(&CStringChecker::evalMempcpy)},
       {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
-       std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
+       forWideChars(&CStringChecker::evalMempcpy)},
       {{CDM::CLibrary, {"memcmp"}, 3},
-       std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
+       forRegularChars(&CStringChecker::evalMemcmp)},
       {{CDM::CLibrary, {"wmemcmp"}, 3},
-       std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
+       forWideChars(&CStringChecker::evalMemcmp)},
       {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
-       std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
+       forRegularChars(&CStringChecker::evalMemmove)},
       {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
-       std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
+       forWideChars(&CStringChecker::evalMemmove)},
       {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
-       &CStringChecker::evalMemset},
-      {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
+       forRegularChars(&CStringChecker::evalMemset)},
+      {{CDM::CLibraryMaybeHardened, {"wmemset"}, 3},
+       forWideChars(&CStringChecker::evalMemset)},
+      {{CDM::CLibrary, {"explicit_memset"}, 3},
+       forRegularChars(&CStringChecker::evalMemset)},
       // FIXME: C23 introduces 'memset_explicit', maybe also model that
       {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
        &CStringChecker::evalStrcpy},
@@ -157,26 +171,40 @@ class CStringChecker : public Checker< eval::Call,
        &CStringChecker::evalStpcpy},
       {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
        &CStringChecker::evalStrlcpy},
+      {{CDM::CLibraryMaybeHardened, {"wcscpy"}, 2},
+       &CStringChecker::evalWcscpy},
+      {{CDM::CLibraryMaybeHardened, {"wcsncpy"}, 3},
+       &CStringChecker::evalWcsncpy},
       {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
        &CStringChecker::evalStrcat},
       {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
        &CStringChecker::evalStrncat},
       {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
        &CStringChecker::evalStrlcat},
+      {{CDM::CLibraryMaybeHardened, {"wcscat"}, 2},
+       &CStringChecker::evalWcscat},
+      {{CDM::CLibraryMaybeHardened, {"wcsncat"}, 3},
+       &CStringChecker::evalWcsncat},
       {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
        &CStringChecker::evalstrLength},
       {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
       {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
        &CStringChecker::evalstrnLength},
       {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
-      {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
-      {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
+      {{CDM::CLibrary, {"strcmp"}, 2},
+       forRegularChars(&CStringChecker::evalStrcmp)},
+      {{CDM::CLibrary, {"wcscmp"}, 2},
+       forWideChars(&CStringChecker::evalStrcmp)},
+      {{CDM::CLibrary, {"strncmp"}, 3},
+       forRegularChars(&CStringChecker::evalStrncmp)},
+      {{CDM::CLibrary, {"wcsncmp"}, 3},
+       forWideChars(&CStringChecker::evalStrncmp)},
       {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
       {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
       {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
       {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
       {{CDM::CLibrary, {"bcmp"}, 3},
-       std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
+       forRegularChars(&CStringChecker::evalMemcmp)},
       {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
       {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
        &CStringChecker::evalBzero},
@@ -191,6 +219,8 @@ class CStringChecker : public Checker< eval::Call,
        &CStringChecker::evalSprintf},
       {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
        &CStringChecker::evalSnprintf},
+      {{CDM::CLibraryMaybeHardened, {"swprintf"}, std::nullopt, 3},
+       &CStringChecker::evalSwprintf},
   };
 
   // These require a bit of special handling.
@@ -218,19 +248,23 @@ class CStringChecker : public Checker< eval::Call,
   void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
   void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
   void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
+  void evalWcscpy(CheckerContext &C, const CallEvent &Call) const;
+  void evalWcsncpy(CheckerContext &C, const CallEvent &Call) const;
   void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
                         bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
-                        bool returnPtr = true) const;
+                        CharKind CK, bool returnPtr = true) const;
 
   void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
   void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
   void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
+  void evalWcscat(CheckerContext &C, const CallEvent &Call) const;
+  void evalWcsncat(CheckerContext &C, const CallEvent &Call) const;
 
-  void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
-  void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
+  void evalStrcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
+  void evalStrncmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
   void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
   void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
-  void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
+  void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call, CharKind CK,
                         bool IsBounded = false, bool IgnoreCase = false) const;
 
   void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
@@ -238,19 +272,23 @@ class CStringChecker : public Checker< eval::Call,
   void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
   void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
   void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
-  void evalMemset(CheckerContext &C, const CallEvent &Call) const;
+  void evalMemset(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
   void evalBzero(CheckerContext &C, const CallEvent &Call) const;
 
   void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
   void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
+  void evalSwprintf(CheckerContext &C, const CallEvent &Call) const;
   void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
-                         bool IsBounded) const;
+                         bool IsBounded, CharKind CK) const;
 
   // Utility methods
   std::pair<ProgramStateRef , ProgramStateRef >
   static assumeZero(CheckerContext &C,
                     ProgramStateRef state, SVal V, QualType Ty);
 
+  static std::pair<ProgramStateRef, ProgramStateRef>
+  assumeSizeZero(CheckerContext &C, ProgramStateRef State, const Expr *Size);
+
   static ProgramStateRef setCStringLength(ProgramStateRef state,
                                               const MemRegion *MR,
                                               SVal strLength);
@@ -270,11 +308,12 @@ class CStringChecker : public Checker< eval::Call,
                                          const Expr *expr,
                                          SVal val) const;
 
+  /// SizeVBytes must be in bytes.
   /// Invalidate the destination buffer determined by characters copied.
   static ProgramStateRef
   invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
-                                    const Expr *BufE, SVal BufV, SVal SizeV,
-                                    QualType SizeTy);
+                                    const Expr *BufE, SVal BufV,
+                                    SVal SizeVBytes, QualType SizeTy);
 
   /// Operation never overflows, do not invalidate the super region.
   static ProgramStateRef invalidateDestinationBufferNeverOverflows(
@@ -302,8 +341,8 @@ class CStringChecker : public Checker< eval::Call,
   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
                               const MemRegion *MR);
 
-  static bool memsetAux(const Expr *DstBuffer, SVal CharE,
-                        const Expr *Size, CheckerContext &C,
+  static bool memsetAux(const Expr *DstBuffer, SVal CharValUnsigned,
+                        const Expr *Size, CharUnits UnitSize, CheckerContext &C,
                         ProgramStateRef &State);
 
   // Re-usable checks
@@ -315,20 +354,15 @@ class CStringChecker : public Checker< eval::Call,
                             AnyArgExpr Buffer, SVal Element, SVal Size) const;
   ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
                                 AnyArgExpr Buffer, SVal Element,
-                                AccessKind Access,
-                                CharKind CK = CharKind::Regular) const;
+                                AccessKind Access, CharKind CK) const;
   ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
                                     AnyArgExpr Buffer, SizeArgExpr Size,
-                                    AccessKind Access,
-                                    CharKind CK = CharKind::Regular) const;
+                                    AccessKind Access, CharKind CK) const;
   ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
                                SizeArgExpr Size, AnyArgExpr First,
-                               AnyArgExpr Second,
-                               CharKind CK = CharKind::Regular) const;
-  void emitOverlapBug(CheckerContext &C,
-                      ProgramStateRef state,
-                      const Stmt *First,
-                      const Stmt *Second) const;
+                               AnyArgExpr Second, CharKind CK) const;
+  void emitOverlapBug(CheckerContext &C, ProgramStateRef state,
+                      const Stmt *First, const Stmt *Second) const;
 
   void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
                       StringRef WarningMsg) const;
@@ -351,6 +385,12 @@ class CStringChecker : public Checker< eval::Call,
   static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
                                 SVal BufVal, QualType BufTy, SVal LengthVal,
                                 QualType LengthTy);
+
+  /// For the sprintf family of functions, the "dest" argument is allowed
+  /// to be null if it is a bounded sprintf function and the bound is 0.
+  /// Check this condition.
+  bool shouldCheckDestForNull(bool IsBounded, const CallEvent &Call,
+                              ProgramStateRef State, CheckerContext &C) const;
 };
 
 } //end anonymous namespace
@@ -373,6 +413,12 @@ CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
   return State->assume(svalBuilder.evalEQ(State, *val, zero));
 }
 
+std::pair<ProgramStateRef, ProgramStateRef>
+CStringChecker::assumeSizeZero(CheckerContext &C, ProgramStateRef State,
+                               const Expr *Size) {
+  return assumeZero(C, State, C.getSVal(Size), Size->getType());
+}
+
 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
                                              ProgramStateRef State,
                                              AnyArgExpr Arg, SVal l) const {
@@ -1149,12 +1195,12 @@ const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
 
 bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
                                        SVal BufVal, QualType BufTy,
-                                       SVal LengthVal, QualType LengthTy) {
+                                       SVal LengthValBytes, QualType LengthTy) {
   // If we do not know that the buffer is long enough we return 'true'.
   // Otherwise the parent region of this field region would also get
   // invalidated, which would lead to warnings based on an unknown state.
 
-  if (LengthVal.isUnknown())
+  if (LengthValBytes.isUnknown())
     return false;
 
   // Originally copied from CheckBufferAccess and CheckLocation.
@@ -1163,7 +1209,7 @@ bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
 
   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
 
-  std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
+  std::optional<NonLoc> Length = LengthValBytes.getAs<NonLoc>();
   if (!Length)
     return true; // cf top comment.
 
@@ -1210,14 +1256,14 @@ bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
 
 ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
     CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,
-    SVal SizeV, QualType SizeTy) {
+    SVal SizeVBytes, QualType SizeTy) {
   auto InvalidationTraitOperations =
-      [&C, S, BufTy = BufE->getType(), BufV, SizeV,
+      [&C, S, BufTy = BufE->getType(), BufV, SizeVBytes,
        SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
         // If destination buffer is a field region and access is in bound, do
         // not invalidate its super region.
         if (MemRegion::FieldRegionKind == R->getKind() &&
-            isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
+            isFirstBufInBound(C, S, BufV, BufTy, SizeVBytes, SizeTy)) {
           ITraits.setTrait(
               R,
               RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
@@ -1347,9 +1393,10 @@ bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
   }
 }
 
-bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
-                               const Expr *Size, CheckerContext &C,
-                               ProgramStateRef &State) {
+bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharValCast,
+                               const Expr *Size, CharUnits UnitSize,
+                               CheckerContext &C, ProgramStateRef &State) {
+
   SVal MemVal = C.getSVal(DstBuffer);
   SVal SizeVal = C.getSVal(Size);
   const MemRegion *MR = MemVal.getAsRegion();
@@ -1368,26 +1415,30 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
     return false;
 
   SValBuilder &svalBuilder = C.getSValBuilder();
+
+  auto SizeInChars =
+      svalBuilder
+          .evalBinOp(State, BO_Mul, *SizeNL,
+                     svalBuilder.makeArrayIndex(UnitSize.getQuantity()),
+                     svalBuilder.getArrayIndexType())
+          .castAs<NonLoc>();
+
   ASTContext &Ctx = C.getASTContext();
 
   // void *memset(void *dest, int ch, size_t count);
+  // wchar_t *wmemset(wchar_t *s, wchar_t c, size_t n);
   // For now we can only handle the case of offset is 0 and concrete char value.
   if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
       Offset.getOffset() == 0) {
     // Get the base region's size.
     DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
 
-    ProgramStateRef StateWholeReg, StateNotWholeReg;
-    std::tie(StateWholeReg, StateNotWholeReg) =
-        State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
-
-    // With the semantic of 'memset()', we should convert the CharVal to
-    // unsigned char.
-    CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
+    auto [StateWholeReg, StateNotWholeReg] =
+        State->assume(svalBuilder.evalEQ(State, SizeDV, SizeInChars));
 
     ProgramStateRef StateNullChar, StateNonNullChar;
     std::tie(StateNullChar, StateNonNullChar) =
-        assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
+        assumeZero(C, State, CharValCast, Ctx.UnsignedCharTy);
 
     if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
         !StateNonNullChar) {
@@ -1403,7 +1454,7 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
       // If the destination buffer's extent is not equal to the value of
       // third argument, just invalidate buffer.
       State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
-                                                SizeVal, Size->getType());
+                                                SizeInChars, Size->getType());
     }
 
     if (StateNullChar && !StateNonNullChar) {
@@ -1418,8 +1469,10 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
 
       // If the value of second argument is not zero, then the string length
       // is at least the size argument.
+      // Using SizeNL here and not SizeInBytes, because strlen and wcslen
+      // in respective units and not in bytes.
       SVal NewStrLenGESize = svalBuilder.evalBinOp(
-          State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
+          State, BO_GE, NewStrLen, *SizeNL, svalBuilder.getConditionType());
 
       State = setCStringLength(
           State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
@@ -1429,7 +1482,7 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
     // If the offset is not zero and char value is not concrete, we can do
     // nothing but invalidate the buffer.
     State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
-                                              SizeVal, Size->getType());
+                                              SizeInChars, Size->getType());
   }
   return true;
 }
@@ -1448,11 +1501,8 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
   // See if the size argument is zero.
   const LocationContext *LCtx = C.getLocationContext();
   SVal sizeVal = state->getSVal(Size.Expression, LCtx);
-  QualType sizeTy = Size.Expression->getType();
-
-  ProgramStateRef stateZeroSize, stateNonZeroSize;
-  std::tie(stateZeroSize, stateNonZeroSize) =
-      assumeZero(C, state, sizeVal, sizeTy);
+  auto [stateZeroSize, stateNonZeroSize] =
+      assumeSizeZero(C, state, Size.Expression);
 
   // Get the value of the Dest.
   SVal destVal = state->getSVal(Dest.Expression, LCtx);
@@ -1610,13 +1660,8 @@ void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
   SValBuilder &Builder = C.getSValBuilder();
   const LocationContext *LCtx = C.getLocationContext();
 
-  // See if the size argument is zero.
-  SVal sizeVal = State->getSVal(Size.Expression, LCtx);
-  QualType sizeTy = Size.Expression->getType();...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/113908


More information about the cfe-commits mailing list