[clang] [analyzer] Improve diagnostics from ArrayBoundCheckerV2 (PR #70056)

via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 2 05:46:01 PDT 2023


=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/70056 at github.com>


================
@@ -174,9 +176,116 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-                                        const Stmt* LoadS,
-                                        CheckerContext &checkerContext) const {
+static std::string getRegionName(const SubRegion *Region) {
+  if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
+    return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs<FieldRegion>()) {
+    if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
+      return formatv("the field '{0}'", Name);
+    return "the unnamed field";
+  }
+
+  if (isa<AllocaRegion>(Region))
+    return "the memory returned by 'alloca'";
+
+  if (isa<SymbolicRegion>(Region) &&
+      isa<HeapSpaceRegion>(Region->getMemorySpace()))
+    return "the heap area";
+
+  if (isa<StringRegion>(Region))
+    return "the string literal";
+
+  return "the region";
+}
+
+static std::optional<int64_t> getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
+    return ConcreteVal->getValue().tryExtValue();
+  }
+  return std::nullopt;
+}
+
+static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
+  static const char *ShortMsgTemplates[] = {
+      "Out of bound access to memory preceding {0}",
+      "Out of bound access to memory after the end of {0}",
+      "Potential out of bound access to {0} with tainted offset"};
+
+  return formatv(ShortMsgTemplates[Kind], RegName);
+}
+
+static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) {
+  SmallString<128> Buf;
+  llvm::raw_svector_ostream Out(Buf);
----------------
DonatNagyE wrote:

This is a minor performance optimization, which was suggested by the LLVM Programmer's Manual:
> The major disadvantage of std::string is that almost every operation that makes them larger can allocate memory, which is slow. As such, it is better to use SmallVector or Twine as a scratch buffer, but then use std::string to persist the result.
(from https://www.llvm.org/docs/ProgrammersManual.html#std-string)

I'd slightly prefer keeping it, but I can replace it with `std::string` if you want.


https://github.com/llvm/llvm-project/pull/70056


More information about the cfe-commits mailing list