[clang] [analyzer] Model overflow builtins (PR #102602)

Pavel Skripkin via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 9 14:09:30 PDT 2024


https://github.com/pskrgag updated https://github.com/llvm/llvm-project/pull/102602

>From 3e0fcffa8fbea5f89ab927fd897c451bcafd8e5e Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin at gmail.com>
Date: Fri, 9 Aug 2024 14:37:47 +0300
Subject: [PATCH 1/2] clang/csa: add initial support for builtin overflow

---
 .../Checkers/BuiltinFunctionChecker.cpp       | 124 +++++++++++++++++-
 clang/test/Analysis/builtin_overflow.c        |  65 +++++++++
 .../test/Analysis/out-of-bounds-diagnostics.c |  17 +++
 3 files changed, 204 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Analysis/builtin_overflow.c

diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
index b198b1c2ff4d11..0c8b9fa3d947b0 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
@@ -21,16 +21,67 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 
 using namespace clang;
 using namespace ento;
 
 namespace {
 
+QualType getOverflowBuiltinResultType(const CallEvent &Call) {
+  assert(Call.getNumArgs() == 3);
+
+  return Call.getArgExpr(2)->getType()->getPointeeType();
+}
+
+QualType getOverflowBuiltinResultType(const CallEvent &Call, CheckerContext &C,
+                                      unsigned BI) {
+  assert(Call.getNumArgs() == 3);
+
+  ASTContext &Ast = C.getASTContext();
+
+  switch (BI) {
+  case Builtin::BI__builtin_smul_overflow:
+  case Builtin::BI__builtin_ssub_overflow:
+  case Builtin::BI__builtin_sadd_overflow:
+    return Ast.IntTy;
+  case Builtin::BI__builtin_smull_overflow:
+  case Builtin::BI__builtin_ssubl_overflow:
+  case Builtin::BI__builtin_saddl_overflow:
+    return Ast.LongTy;
+  case Builtin::BI__builtin_smulll_overflow:
+  case Builtin::BI__builtin_ssubll_overflow:
+  case Builtin::BI__builtin_saddll_overflow:
+    return Ast.LongLongTy;
+  case Builtin::BI__builtin_umul_overflow:
+  case Builtin::BI__builtin_usub_overflow:
+  case Builtin::BI__builtin_uadd_overflow:
+    return Ast.UnsignedIntTy;
+  case Builtin::BI__builtin_umull_overflow:
+  case Builtin::BI__builtin_usubl_overflow:
+  case Builtin::BI__builtin_uaddl_overflow:
+    return Ast.UnsignedLongTy;
+  case Builtin::BI__builtin_umulll_overflow:
+  case Builtin::BI__builtin_usubll_overflow:
+  case Builtin::BI__builtin_uaddll_overflow:
+    return Ast.UnsignedLongLongTy;
+  case Builtin::BI__builtin_mul_overflow:
+  case Builtin::BI__builtin_sub_overflow:
+  case Builtin::BI__builtin_add_overflow:
+    return getOverflowBuiltinResultType(Call);
+  default:
+    assert(false && "Unknown overflow builtin");
+  }
+}
+
 class BuiltinFunctionChecker : public Checker<eval::Call> {
 public:
   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
+  void HandleOverflowBuiltin(const CallEvent &Call, CheckerContext &C,
+                             BinaryOperator::Opcode Op,
+                             QualType ResultType) const;
 
 private:
   // From: clang/include/clang/Basic/Builtins.def
@@ -50,6 +101,44 @@ class BuiltinFunctionChecker : public Checker<eval::Call> {
 
 } // namespace
 
+void BuiltinFunctionChecker::HandleOverflowBuiltin(const CallEvent &Call,
+                                                   CheckerContext &C,
+                                                   BinaryOperator::Opcode Op,
+                                                   QualType ResultType) const {
+  // All __builtin_*_overflow functions take 3 argumets.
+  assert(Call.getNumArgs() == 3);
+
+  ProgramStateRef State = C.getState();
+  SValBuilder &SVB = C.getSValBuilder();
+  const Expr *CE = Call.getOriginExpr();
+
+  SVal Arg1 = Call.getArgSVal(0);
+  SVal Arg2 = Call.getArgSVal(1);
+
+  SVal RetVal = SVB.evalBinOp(State, Op, Arg1, Arg2, ResultType);
+
+  // TODO: Handle overflows with values that known to overflow. Like INT_MAX + 1
+  // should not produce state for non-overflow case and threat it as
+  // unreachable.
+
+  // Handle non-overflow case.
+  {
+    ProgramStateRef StateSuccess =
+        State->BindExpr(CE, C.getLocationContext(), SVB.makeTruthVal(false));
+
+    if (auto L = Call.getArgSVal(2).getAs<Loc>())
+      StateSuccess = StateSuccess->bindLoc(*L, RetVal, C.getLocationContext());
+
+    C.addTransition(StateSuccess);
+  }
+
+  // Handle overflow case.
+  {
+    C.addTransition(
+        State->BindExpr(CE, C.getLocationContext(), SVB.makeTruthVal(true)));
+  }
+}
+
 bool BuiltinFunctionChecker::isBuiltinLikeFunction(
     const CallEvent &Call) const {
   const auto *FD = llvm::dyn_cast_or_null<FunctionDecl>(Call.getDecl());
@@ -82,10 +171,41 @@ bool BuiltinFunctionChecker::evalCall(const CallEvent &Call,
     return true;
   }
 
-  switch (FD->getBuiltinID()) {
+  unsigned BI = FD->getBuiltinID();
+
+  switch (BI) {
   default:
     return false;
-
+  case Builtin::BI__builtin_mul_overflow:
+  case Builtin::BI__builtin_smul_overflow:
+  case Builtin::BI__builtin_smull_overflow:
+  case Builtin::BI__builtin_smulll_overflow:
+  case Builtin::BI__builtin_umul_overflow:
+  case Builtin::BI__builtin_umull_overflow:
+  case Builtin::BI__builtin_umulll_overflow:
+    HandleOverflowBuiltin(Call, C, BO_Mul,
+                          getOverflowBuiltinResultType(Call, C, BI));
+    return true;
+  case Builtin::BI__builtin_sub_overflow:
+  case Builtin::BI__builtin_ssub_overflow:
+  case Builtin::BI__builtin_ssubl_overflow:
+  case Builtin::BI__builtin_ssubll_overflow:
+  case Builtin::BI__builtin_usub_overflow:
+  case Builtin::BI__builtin_usubl_overflow:
+  case Builtin::BI__builtin_usubll_overflow:
+    HandleOverflowBuiltin(Call, C, BO_Sub,
+                          getOverflowBuiltinResultType(Call, C, BI));
+    return true;
+  case Builtin::BI__builtin_add_overflow:
+  case Builtin::BI__builtin_sadd_overflow:
+  case Builtin::BI__builtin_saddl_overflow:
+  case Builtin::BI__builtin_saddll_overflow:
+  case Builtin::BI__builtin_uadd_overflow:
+  case Builtin::BI__builtin_uaddl_overflow:
+  case Builtin::BI__builtin_uaddll_overflow:
+    HandleOverflowBuiltin(Call, C, BO_Add,
+                          getOverflowBuiltinResultType(Call, C, BI));
+    return true;
   case Builtin::BI__builtin_assume:
   case Builtin::BI__assume: {
     assert (Call.getNumArgs() > 0);
diff --git a/clang/test/Analysis/builtin_overflow.c b/clang/test/Analysis/builtin_overflow.c
new file mode 100644
index 00000000000000..3b8639aa450046
--- /dev/null
+++ b/clang/test/Analysis/builtin_overflow.c
@@ -0,0 +1,65 @@
+// RUN: %clang_analyze_cc1 -triple x86_64-unknown-unknown -verify %s \
+// RUN:   -analyzer-checker=core,debug.ExprInspection
+
+#define NULL ((void *)0)
+#define INT_MAX __INT_MAX__
+
+void clang_analyzer_dump_int(int);
+
+void test1(void)
+{
+   int res;
+
+   if (__builtin_add_overflow(10, 20, &res)) {
+     clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+     return;
+   }
+
+   clang_analyzer_dump_int(res); //expected-warning{{30}}
+}
+
+void test2(void)
+{
+   int res;
+
+   __builtin_add_overflow(10, 20, &res);
+   clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}} expected-warning{{S32b}}
+}
+
+void test3(void)
+{
+   int res;
+
+   if (__builtin_sub_overflow(10, 20, &res)) {
+     clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+     return;
+   }
+
+   clang_analyzer_dump_int(res); //expected-warning{{-10}}
+}
+
+void test4(void)
+{
+   int res;
+
+   if (__builtin_sub_overflow(10, 20, &res)) {
+     clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+     return;
+   }
+
+   if (res != -10) {
+     *(volatile char *)NULL; //no warning
+   }
+}
+
+void test5(void)
+{
+   int res;
+
+   if (__builtin_mul_overflow(10, 20, &res)) {
+     clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+     return;
+   }
+
+   clang_analyzer_dump_int(res); //expected-warning{{200}}
+}
diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c
index de70e483add1c0..73b56e7a8ba4db 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -278,6 +278,23 @@ int *mallocRegion(void) {
   return mem;
 }
 
+int *custom_calloc(size_t a, size_t b) {
+  size_t res;
+  if (__builtin_mul_overflow(a, b, &res))
+    return 0;
+
+  return malloc(res);
+}
+
+int *mallocRegionOverflow(void) {
+  int *mem = (int*)custom_calloc(4, 10);
+
+  mem[20] = 10;
+  // expected-warning at -1 {{Out of bound access to memory after the end of the heap area}}
+  // expected-note at -2 {{Access of the heap area at index 20, while it holds only 10 'int' elements}}
+  return mem;
+}
+
 int *mallocRegionDeref(void) {
   int *mem = (int*)malloc(2*sizeof(int));
 

>From 1b5d73f2f6cdcfed55776b9ae8ebbdbc026f7554 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin at gmail.com>
Date: Sat, 10 Aug 2024 00:08:54 +0300
Subject: [PATCH 2/2] clang/csa: address review and try to use symbolic values
 to split state

---
 .../Checkers/BuiltinFunctionChecker.cpp       | 64 +++++++++++++----
 clang/test/Analysis/builtin_overflow.c        | 68 ++++++++++++-------
 .../test/Analysis/out-of-bounds-diagnostics.c |  6 +-
 3 files changed, 95 insertions(+), 43 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
index 0c8b9fa3d947b0..d048c02dd60012 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
@@ -30,6 +30,14 @@ using namespace ento;
 
 namespace {
 
+QualType getSufficientTypeForOverflowOp(CheckerContext &C, const QualType &T) {
+  assert(T->isIntegerType());
+  ASTContext &ACtx = C.getASTContext();
+
+  unsigned BitWidth = ACtx.getIntWidth(T);
+  return ACtx.getIntTypeForBitwidth(BitWidth * 2, T->isSignedIntegerType());
+}
+
 QualType getOverflowBuiltinResultType(const CallEvent &Call) {
   assert(Call.getNumArgs() == 3);
 
@@ -73,15 +81,18 @@ QualType getOverflowBuiltinResultType(const CallEvent &Call, CheckerContext &C,
     return getOverflowBuiltinResultType(Call);
   default:
     assert(false && "Unknown overflow builtin");
+    return Ast.IntTy;
   }
 }
 
 class BuiltinFunctionChecker : public Checker<eval::Call> {
 public:
   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
-  void HandleOverflowBuiltin(const CallEvent &Call, CheckerContext &C,
+  void handleOverflowBuiltin(const CallEvent &Call, CheckerContext &C,
                              BinaryOperator::Opcode Op,
                              QualType ResultType) const;
+  std::pair<bool, bool> checkOverflow(CheckerContext &C, SVal RetVal,
+                                      QualType Res) const;
 
 private:
   // From: clang/include/clang/Basic/Builtins.def
@@ -101,7 +112,36 @@ class BuiltinFunctionChecker : public Checker<eval::Call> {
 
 } // namespace
 
-void BuiltinFunctionChecker::HandleOverflowBuiltin(const CallEvent &Call,
+std::pair<bool, bool>
+BuiltinFunctionChecker::checkOverflow(CheckerContext &C, SVal RetVal,
+                                      QualType Res) const {
+  ProgramStateRef State = C.getState();
+  SValBuilder &SVB = C.getSValBuilder();
+  auto SvalToBool = [&](SVal val) {
+    return State->isNonNull(val).isConstrainedTrue();
+  };
+  ASTContext &ACtx = C.getASTContext();
+
+  assert(Res->isIntegerType());
+
+  unsigned BitWidth = ACtx.getIntWidth(Res);
+  SVal MinType = nonloc::ConcreteInt(
+      llvm::APSInt::getMinValue(BitWidth, Res->isUnsignedIntegerType()));
+  SVal MaxType = nonloc::ConcreteInt(
+      llvm::APSInt::getMaxValue(BitWidth, Res->isUnsignedIntegerType()));
+
+  bool IsGreaterMax =
+      SvalToBool(SVB.evalBinOp(State, BO_GT, RetVal, MaxType, Res));
+  bool IsLessMin =
+      SvalToBool(SVB.evalBinOp(State, BO_LT, RetVal, MinType, Res));
+
+  bool IsLeMax = SvalToBool(SVB.evalBinOp(State, BO_LE, RetVal, MaxType, Res));
+  bool IsGeMin = SvalToBool(SVB.evalBinOp(State, BO_GE, RetVal, MinType, Res));
+
+  return {IsGreaterMax || IsLessMin, IsLeMax && IsGeMin};
+}
+
+void BuiltinFunctionChecker::handleOverflowBuiltin(const CallEvent &Call,
                                                    CheckerContext &C,
                                                    BinaryOperator::Opcode Op,
                                                    QualType ResultType) const {
@@ -115,14 +155,12 @@ void BuiltinFunctionChecker::HandleOverflowBuiltin(const CallEvent &Call,
   SVal Arg1 = Call.getArgSVal(0);
   SVal Arg2 = Call.getArgSVal(1);
 
+  SVal RetValMax = SVB.evalBinOp(State, Op, Arg1, Arg2,
+                                 getSufficientTypeForOverflowOp(C, ResultType));
   SVal RetVal = SVB.evalBinOp(State, Op, Arg1, Arg2, ResultType);
 
-  // TODO: Handle overflows with values that known to overflow. Like INT_MAX + 1
-  // should not produce state for non-overflow case and threat it as
-  // unreachable.
-
-  // Handle non-overflow case.
-  {
+  auto [Overflow, NotOverflow] = checkOverflow(C, RetValMax, ResultType);
+  if (NotOverflow) {
     ProgramStateRef StateSuccess =
         State->BindExpr(CE, C.getLocationContext(), SVB.makeTruthVal(false));
 
@@ -132,11 +170,9 @@ void BuiltinFunctionChecker::HandleOverflowBuiltin(const CallEvent &Call,
     C.addTransition(StateSuccess);
   }
 
-  // Handle overflow case.
-  {
+  if (Overflow)
     C.addTransition(
         State->BindExpr(CE, C.getLocationContext(), SVB.makeTruthVal(true)));
-  }
 }
 
 bool BuiltinFunctionChecker::isBuiltinLikeFunction(
@@ -183,7 +219,7 @@ bool BuiltinFunctionChecker::evalCall(const CallEvent &Call,
   case Builtin::BI__builtin_umul_overflow:
   case Builtin::BI__builtin_umull_overflow:
   case Builtin::BI__builtin_umulll_overflow:
-    HandleOverflowBuiltin(Call, C, BO_Mul,
+    handleOverflowBuiltin(Call, C, BO_Mul,
                           getOverflowBuiltinResultType(Call, C, BI));
     return true;
   case Builtin::BI__builtin_sub_overflow:
@@ -193,7 +229,7 @@ bool BuiltinFunctionChecker::evalCall(const CallEvent &Call,
   case Builtin::BI__builtin_usub_overflow:
   case Builtin::BI__builtin_usubl_overflow:
   case Builtin::BI__builtin_usubll_overflow:
-    HandleOverflowBuiltin(Call, C, BO_Sub,
+    handleOverflowBuiltin(Call, C, BO_Sub,
                           getOverflowBuiltinResultType(Call, C, BI));
     return true;
   case Builtin::BI__builtin_add_overflow:
@@ -203,7 +239,7 @@ bool BuiltinFunctionChecker::evalCall(const CallEvent &Call,
   case Builtin::BI__builtin_uadd_overflow:
   case Builtin::BI__builtin_uaddl_overflow:
   case Builtin::BI__builtin_uaddll_overflow:
-    HandleOverflowBuiltin(Call, C, BO_Add,
+    handleOverflowBuiltin(Call, C, BO_Add,
                           getOverflowBuiltinResultType(Call, C, BI));
     return true;
   case Builtin::BI__builtin_assume:
diff --git a/clang/test/Analysis/builtin_overflow.c b/clang/test/Analysis/builtin_overflow.c
index 3b8639aa450046..695c8e66e8b842 100644
--- a/clang/test/Analysis/builtin_overflow.c
+++ b/clang/test/Analysis/builtin_overflow.c
@@ -1,65 +1,83 @@
 // RUN: %clang_analyze_cc1 -triple x86_64-unknown-unknown -verify %s \
 // RUN:   -analyzer-checker=core,debug.ExprInspection
 
-#define NULL ((void *)0)
-#define INT_MAX __INT_MAX__
+#define __UINT_MAX__ (__INT_MAX__  * 2U + 1U)
 
 void clang_analyzer_dump_int(int);
+void clang_analyzer_eval(int);
+void clang_analyzer_warnIfReached(void);
 
-void test1(void)
+void test_add_nooverflow(void)
 {
    int res;
 
    if (__builtin_add_overflow(10, 20, &res)) {
-     clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+     clang_analyzer_warnIfReached();
      return;
    }
 
-   clang_analyzer_dump_int(res); //expected-warning{{30}}
+   clang_analyzer_dump_int(res); //expected-warning{{30 S32b}}
 }
 
-void test2(void)
+void test_add_overflow(void)
 {
    int res;
 
-   __builtin_add_overflow(10, 20, &res);
-   clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}} expected-warning{{S32b}}
+   if (__builtin_add_overflow(__INT_MAX__, 1, &res)) {
+     clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+     return;
+   }
+
+   clang_analyzer_warnIfReached();
 }
 
-void test3(void)
+void test_add_overflow_contraints(int a, int b)
 {
    int res;
 
-   if (__builtin_sub_overflow(10, 20, &res)) {
-     clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+   if (a != 10)
+     return;
+   if (b != 0)
+     return;
+
+   if (__builtin_add_overflow(a, b, &res)) {
+     clang_analyzer_warnIfReached();
      return;
    }
 
-   clang_analyzer_dump_int(res); //expected-warning{{-10}}
+   clang_analyzer_dump_int(res); //expected-warning{{10 S32b}}
 }
 
-void test4(void)
+void test_uaddll_overflow_contraints(unsigned long a, unsigned long b)
 {
-   int res;
+   unsigned long long res;
 
-   if (__builtin_sub_overflow(10, 20, &res)) {
-     clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+   if (a != 10)
+     return;
+   if (b != 10)
      return;
-   }
 
-   if (res != -10) {
-     *(volatile char *)NULL; //no warning
+   if (__builtin_uaddll_overflow(a, b, &res)) {
+     clang_analyzer_warnIfReached();
+     return;
    }
 }
 
-void test5(void)
+void test_uadd_overflow_contraints(unsigned a, unsigned b)
 {
-   int res;
+   unsigned res;
 
-   if (__builtin_mul_overflow(10, 20, &res)) {
-     clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}}
+   if (a > 10)
      return;
-   }
+   if (b > 10)
+     return;
+
+   // clang_analyzer_eval(a + b < 30); <--- Prints 1 and 0, but why ???
 
-   clang_analyzer_dump_int(res); //expected-warning{{200}}
+   if (__builtin_uadd_overflow(a, b, &res)) {
+     /* clang_analyzer_warnIfReached(); */
+     return;
+   }
 }
+
+// TODO: more tests after figuring out what's going on.
diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c
index 73b56e7a8ba4db..3231fbc1a1be00 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -280,14 +280,12 @@ int *mallocRegion(void) {
 
 int *custom_calloc(size_t a, size_t b) {
   size_t res;
-  if (__builtin_mul_overflow(a, b, &res))
-    return 0;
 
-  return malloc(res);
+  return __builtin_mul_overflow(a, b, &res) ? 0 : malloc(res);
 }
 
 int *mallocRegionOverflow(void) {
-  int *mem = (int*)custom_calloc(4, 10);
+  int *mem = (int*)custom_calloc(10, sizeof(int));
 
   mem[20] = 10;
   // expected-warning at -1 {{Out of bound access to memory after the end of the heap area}}



More information about the cfe-commits mailing list