[clang] 97dd8e3 - [analyzer] Clean up apiModeling.llvm.ReturnValue (#91231)

via cfe-commits cfe-commits at lists.llvm.org
Tue May 7 04:06:15 PDT 2024


Author: DonĂ¡t Nagy
Date: 2024-05-07T13:06:11+02:00
New Revision: 97dd8e3c4f38ef345b01fbbf0a2052c7875ff7e0

URL: https://github.com/llvm/llvm-project/commit/97dd8e3c4f38ef345b01fbbf0a2052c7875ff7e0
DIFF: https://github.com/llvm/llvm-project/commit/97dd8e3c4f38ef345b01fbbf0a2052c7875ff7e0.diff

LOG: [analyzer] Clean up apiModeling.llvm.ReturnValue (#91231)

This commit heavily refactors and simplifies the small and trivial
checker `apiModeling.llvm.ReturnValue`, which is responsible for
modeling the peculiar coding convention that in the LLVM/Clang codebase
certain Error() methods always return true.

Changes included in this commit:
- The call description mode is now specified explicitly (this is not the
most significant change, but it was the original reason for touching
this checker).
- Previously the code provided support for modeling functions that
always return `false`; but there was no need for that, so this commit
hardcodes that the return value is `true`.
- The overcomplicated constraint/state handling logic was simplified.
- The separate `checkEndFunction` callback was removed to simplify the
code. Admittedly this means that the note tag for the "<method> returns
false, breaking the convention" case is placed on the method call
instead of the `return` statement; but that case will _never_ appear in
practice, so this difference is mostly academical.
- The text of the note tags was clarified.
- The descriptions in the header comment and Checkers.td were clarified.
- Some minor cleanup was applied in the associated test file.

This change is very close to NFC because it only affects a hidden
`apiModeling.llvm` checker that's only relevant during the analysis of
the LLVM/Clang codebase, and even there it doesn't affect the normal
behavior of the checker.

Added: 
    

Modified: 
    clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
    clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp
    clang/test/Analysis/return-value-guaranteed.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 520286b57c9fd..64414e3d37f75 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1397,7 +1397,7 @@ def CastValueChecker : Checker<"CastValue">,
   Documentation<NotDocumented>;
 
 def ReturnValueChecker : Checker<"ReturnValue">,
-  HelpText<"Model the guaranteed boolean return value of function calls">,
+  HelpText<"Model certain Error() methods that always return true by convention">,
   Documentation<NotDocumented>;
 
 } // end "apiModeling.llvm"

diff  --git a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp
index c3112ebe4e794..3da571adfa44c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp
@@ -1,4 +1,4 @@
-//===- ReturnValueChecker - Applies guaranteed return values ----*- C++ -*-===//
+//===- ReturnValueChecker - Check methods always returning true -*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This defines ReturnValueChecker, which checks for calls with guaranteed
-// boolean return value. It ensures the return value of each function call.
+// This defines ReturnValueChecker, which models a very specific coding
+// convention within the LLVM/Clang codebase: there several classes that have
+// Error() methods which always return true.
+// This checker was introduced to eliminate false positives caused by this
+// peculiar "always returns true" invariant. (Normally, the analyzer assumes
+// that a function returning `bool` can return both `true` and `false`, because
+// otherwise it could've been a `void` function.)
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,43 +23,40 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/FormatVariadic.h"
 #include <optional>
 
 using namespace clang;
 using namespace ento;
+using llvm::formatv;
 
 namespace {
-class ReturnValueChecker : public Checker<check::PostCall, check::EndFunction> {
+class ReturnValueChecker : public Checker<check::PostCall> {
 public:
-  // It sets the predefined invariant ('CDM') if the current call not break it.
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
 
-  // It reports whether a predefined invariant ('CDM') is broken.
-  void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
-
 private:
-  // The pairs are in the following form: {{{class, call}}, return value}
-  const CallDescriptionMap<bool> CDM = {
+  const CallDescriptionSet Methods = {
       // These are known in the LLVM project: 'Error()'
-      {{{"ARMAsmParser", "Error"}}, true},
-      {{{"HexagonAsmParser", "Error"}}, true},
-      {{{"LLLexer", "Error"}}, true},
-      {{{"LLParser", "Error"}}, true},
-      {{{"MCAsmParser", "Error"}}, true},
-      {{{"MCAsmParserExtension", "Error"}}, true},
-      {{{"TGParser", "Error"}}, true},
-      {{{"X86AsmParser", "Error"}}, true},
+      {CDM::CXXMethod, {"ARMAsmParser", "Error"}},
+      {CDM::CXXMethod, {"HexagonAsmParser", "Error"}},
+      {CDM::CXXMethod, {"LLLexer", "Error"}},
+      {CDM::CXXMethod, {"LLParser", "Error"}},
+      {CDM::CXXMethod, {"MCAsmParser", "Error"}},
+      {CDM::CXXMethod, {"MCAsmParserExtension", "Error"}},
+      {CDM::CXXMethod, {"TGParser", "Error"}},
+      {CDM::CXXMethod, {"X86AsmParser", "Error"}},
       // 'TokError()'
-      {{{"LLParser", "TokError"}}, true},
-      {{{"MCAsmParser", "TokError"}}, true},
-      {{{"MCAsmParserExtension", "TokError"}}, true},
-      {{{"TGParser", "TokError"}}, true},
+      {CDM::CXXMethod, {"LLParser", "TokError"}},
+      {CDM::CXXMethod, {"MCAsmParser", "TokError"}},
+      {CDM::CXXMethod, {"MCAsmParserExtension", "TokError"}},
+      {CDM::CXXMethod, {"TGParser", "TokError"}},
       // 'error()'
-      {{{"MIParser", "error"}}, true},
-      {{{"WasmAsmParser", "error"}}, true},
-      {{{"WebAssemblyAsmParser", "error"}}, true},
+      {CDM::CXXMethod, {"MIParser", "error"}},
+      {CDM::CXXMethod, {"WasmAsmParser", "error"}},
+      {CDM::CXXMethod, {"WebAssemblyAsmParser", "error"}},
       // Other
-      {{{"AsmParser", "printError"}}, true}};
+      {CDM::CXXMethod, {"AsmParser", "printError"}}};
 };
 } // namespace
 
@@ -68,100 +70,32 @@ static std::string getName(const CallEvent &Call) {
   return Name;
 }
 
-// The predefinitions ('CDM') could break due to the ever growing code base.
-// Check for the expected invariants and see whether they apply.
-static std::optional<bool> isInvariantBreak(bool ExpectedValue, SVal ReturnV,
-                                            CheckerContext &C) {
-  auto ReturnDV = ReturnV.getAs<DefinedOrUnknownSVal>();
-  if (!ReturnDV)
-    return std::nullopt;
-
-  if (ExpectedValue)
-    return C.getState()->isNull(*ReturnDV).isConstrainedTrue();
-
-  return C.getState()->isNull(*ReturnDV).isConstrainedFalse();
-}
-
 void ReturnValueChecker::checkPostCall(const CallEvent &Call,
                                        CheckerContext &C) const {
-  const bool *RawExpectedValue = CDM.lookup(Call);
-  if (!RawExpectedValue)
+  if (!Methods.contains(Call))
     return;
 
-  SVal ReturnV = Call.getReturnValue();
-  bool ExpectedValue = *RawExpectedValue;
-  std::optional<bool> IsInvariantBreak =
-      isInvariantBreak(ExpectedValue, ReturnV, C);
-  if (!IsInvariantBreak)
-    return;
+  auto ReturnV = Call.getReturnValue().getAs<DefinedOrUnknownSVal>();
 
-  // If the invariant is broken it is reported by 'checkEndFunction()'.
-  if (*IsInvariantBreak)
+  if (!ReturnV)
     return;
 
-  std::string Name = getName(Call);
-  const NoteTag *CallTag = C.getNoteTag(
-      [Name, ExpectedValue](PathSensitiveBugReport &) -> std::string {
-        SmallString<128> Msg;
-        llvm::raw_svector_ostream Out(Msg);
-
-        Out << '\'' << Name << "' returns "
-            << (ExpectedValue ? "true" : "false");
-        return std::string(Out.str());
-      },
-      /*IsPrunable=*/true);
-
   ProgramStateRef State = C.getState();
-  State = State->assume(ReturnV.castAs<DefinedOrUnknownSVal>(), ExpectedValue);
-  C.addTransition(State, CallTag);
-}
-
-void ReturnValueChecker::checkEndFunction(const ReturnStmt *RS,
-                                          CheckerContext &C) const {
-  if (!RS || !RS->getRetValue())
+  if (ProgramStateRef StTrue = State->assume(*ReturnV, true)) {
+    // The return value can be true, so transition to a state where it's true.
+    std::string Msg =
+        formatv("'{0}' returns true (by convention)", getName(Call));
+    C.addTransition(StTrue, C.getNoteTag(Msg, /*IsPrunable=*/true));
     return;
-
-  // We cannot get the caller in the top-frame.
-  const StackFrameContext *SFC = C.getStackFrame();
-  if (C.getStackFrame()->inTopFrame())
-    return;
-
-  ProgramStateRef State = C.getState();
-  CallEventManager &CMgr = C.getStateManager().getCallEventManager();
-  CallEventRef<> Call = CMgr.getCaller(SFC, State);
-  if (!Call)
-    return;
-
-  const bool *RawExpectedValue = CDM.lookup(*Call);
-  if (!RawExpectedValue)
-    return;
-
-  SVal ReturnV = State->getSVal(RS->getRetValue(), C.getLocationContext());
-  bool ExpectedValue = *RawExpectedValue;
-  std::optional<bool> IsInvariantBreak =
-      isInvariantBreak(ExpectedValue, ReturnV, C);
-  if (!IsInvariantBreak)
-    return;
-
-  // If the invariant is appropriate it is reported by 'checkPostCall()'.
-  if (!*IsInvariantBreak)
-    return;
-
-  std::string Name = getName(*Call);
-  const NoteTag *CallTag = C.getNoteTag(
-      [Name, ExpectedValue](BugReport &BR) -> std::string {
-        SmallString<128> Msg;
-        llvm::raw_svector_ostream Out(Msg);
-
-        // The following is swapped because the invariant is broken.
-        Out << '\'' << Name << "' returns "
-            << (ExpectedValue ? "false" : "true");
-
-        return std::string(Out.str());
-      },
-      /*IsPrunable=*/false);
-
-  C.addTransition(State, CallTag);
+  }
+  // Paranoia: if the return value is known to be false (which is highly
+  // unlikely, it's easy to ensure that the method always returns true), then
+  // produce a note that highlights that this unusual situation.
+  // Note that this checker is 'hidden' so it cannot produce a bug report.
+  std::string Msg = formatv("'{0}' returned false, breaking the convention "
+                            "that it always returns true",
+                            getName(Call));
+  C.addTransition(State, C.getNoteTag(Msg, /*IsPrunable=*/true));
 }
 
 void ento::registerReturnValueChecker(CheckerManager &Mgr) {

diff  --git a/clang/test/Analysis/return-value-guaranteed.cpp b/clang/test/Analysis/return-value-guaranteed.cpp
index 367a8e5906afc..3b010ffba3600 100644
--- a/clang/test/Analysis/return-value-guaranteed.cpp
+++ b/clang/test/Analysis/return-value-guaranteed.cpp
@@ -1,91 +1,91 @@
 // RUN: %clang_analyze_cc1 \
 // RUN:  -analyzer-checker=core,apiModeling.llvm.ReturnValue \
-// RUN:  -analyzer-output=text -verify=class %s
+// RUN:  -analyzer-output=text -verify %s
 
 struct Foo { int Field; };
 bool problem();
 void doSomething();
 
-// We predefined the return value of 'MCAsmParser::Error' as true and we cannot
-// take the false-branches which leads to a "garbage value" false positive.
-namespace test_classes {
+// Test the normal case when the implementation of MCAsmParser::Error() (one of
+// the methods modeled by this checker) is opaque.
+namespace test_normal {
 struct MCAsmParser {
   static bool Error();
 };
 
 bool parseFoo(Foo &F) {
   if (problem()) {
-    // class-note at -1 {{Assuming the condition is false}}
-    // class-note at -2 {{Taking false branch}}
+    // expected-note at -1 {{Assuming the condition is false}}
+    // expected-note at -2 {{Taking false branch}}
     return MCAsmParser::Error();
   }
 
   F.Field = 0;
-  // class-note at -1 {{The value 0 is assigned to 'F.Field'}}
-  return !MCAsmParser::Error();
-  // class-note at -1 {{'MCAsmParser::Error' returns true}}
+  // expected-note at -1 {{The value 0 is assigned to 'F.Field'}}
+  return false;
 }
 
 bool parseFile() {
   Foo F;
   if (parseFoo(F)) {
-    // class-note at -1 {{Calling 'parseFoo'}}
-    // class-note at -2 {{Returning from 'parseFoo'}}
-    // class-note at -3 {{Taking false branch}}
+    // expected-note at -1 {{Calling 'parseFoo'}}
+    // expected-note at -2 {{Returning from 'parseFoo'}}
+    // expected-note at -3 {{Taking false branch}}
     return true;
   }
 
+  // The following expression would produce the false positive report
+  //    "The left operand of '==' is a garbage value"
+  // without the modeling done by apiModeling.llvm.ReturnValue:
   if (F.Field == 0) {
-    // class-note at -1 {{Field 'Field' is equal to 0}}
-    // class-note at -2 {{Taking true branch}}
-
-    // no-warning: "The left operand of '==' is a garbage value" was here.
+    // expected-note at -1 {{Field 'Field' is equal to 0}}
+    // expected-note at -2 {{Taking true branch}}
     doSomething();
   }
 
+  // Trigger a zero division to get path notes:
   (void)(1 / F.Field);
-  // class-warning at -1 {{Division by zero}}
-  // class-note at -2 {{Division by zero}}
+  // expected-warning at -1 {{Division by zero}}
+  // expected-note at -2 {{Division by zero}}
   return false;
 }
-} // namespace test_classes
+} // namespace test_normal
 
 
-// We predefined 'MCAsmParser::Error' as returning true, but now it returns
-// false, which breaks our invariant. Test the notes.
+// Sanity check for the highly unlikely case where the implementation of the
+// method breaks the convention.
 namespace test_break {
 struct MCAsmParser {
   static bool Error() {
-    return false; // class-note {{'MCAsmParser::Error' returns false}}
+    return false;
   }
 };
 
 bool parseFoo(Foo &F) {
   if (problem()) {
-    // class-note at -1 {{Assuming the condition is false}}
-    // class-note at -2 {{Taking false branch}}
+    // expected-note at -1 {{Assuming the condition is false}}
+    // expected-note at -2 {{Taking false branch}}
     return !MCAsmParser::Error();
   }
 
   F.Field = 0;
-  // class-note at -1 {{The value 0 is assigned to 'F.Field'}}
+  // expected-note at -1 {{The value 0 is assigned to 'F.Field'}}
   return MCAsmParser::Error();
-  // class-note at -1 {{Calling 'MCAsmParser::Error'}}
-  // class-note at -2 {{Returning from 'MCAsmParser::Error'}}
+  // expected-note at -1 {{'MCAsmParser::Error' returned false, breaking the convention that it always returns true}}
 }
 
 bool parseFile() {
   Foo F;
   if (parseFoo(F)) {
-    // class-note at -1 {{Calling 'parseFoo'}}
-    // class-note at -2 {{Returning from 'parseFoo'}}
-    // class-note at -3 {{Taking false branch}}
+    // expected-note at -1 {{Calling 'parseFoo'}}
+    // expected-note at -2 {{Returning from 'parseFoo'}}
+    // expected-note at -3 {{Taking false branch}}
     return true;
   }
 
   (void)(1 / F.Field);
-  // class-warning at -1 {{Division by zero}}
-  // class-note at -2 {{Division by zero}}
+  // expected-warning at -1 {{Division by zero}}
+  // expected-note at -2 {{Division by zero}}
   return false;
 }
-} // namespace test_classes
+} // namespace test_break


        


More information about the cfe-commits mailing list