[clang] 200007e - [-Wunsafe-buffer-usage] Initial commit - Transition away from raw buffers.

Artem Dergachev via cfe-commits cfe-commits at lists.llvm.org
Mon Dec 5 15:13:48 PST 2022


Author: Artem Dergachev
Date: 2022-12-05T15:13:42-08:00
New Revision: 200007ec85f81122fd260a4e68308e54607ca37a

URL: https://github.com/llvm/llvm-project/commit/200007ec85f81122fd260a4e68308e54607ca37a
DIFF: https://github.com/llvm/llvm-project/commit/200007ec85f81122fd260a4e68308e54607ca37a.diff

LOG: [-Wunsafe-buffer-usage] Initial commit - Transition away from raw buffers.

This is the initial commit for -Wunsafe-buffer-usage, a warning that helps
codebases (especially modern C++ codebases) transition away from raw buffer
pointers.

The warning is implemented in libAnalysis as it's going to become a non-trivial
analysis, mostly the fixit part where we try to figure out if we understand
a variable's use pattern well enough to suggest a safe container/view
as a replacement. Some parts of this analsysis may eventually prove useful
for any similar fixit machine that tries to change types of variables.

The warning is disabled by default.

RFC/discussion in https://discourse.llvm.org/t/rfc-c-buffer-hardening/65734

Differential Revision: https://reviews.llvm.org/D137346

Added: 
    clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
    clang/lib/Analysis/UnsafeBufferUsage.cpp
    clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp

Modified: 
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/lib/Analysis/CMakeLists.txt
    clang/lib/Sema/AnalysisBasedWarnings.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
new file mode 100644
index 0000000000000..1ddbb92eb66f7
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -0,0 +1,38 @@
+//===- UnsafeBufferUsage.h - Replace pointers with modern C++ ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines an analysis that aids replacing buffer accesses through
+//  raw pointers with safer C++ abstractions such as containers and views/spans.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
+#define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+
+namespace clang {
+
+/// The interface that lets the caller handle unsafe buffer usage analysis
+/// results by overriding this class's handle... methods.
+class UnsafeBufferUsageHandler {
+public:
+  UnsafeBufferUsageHandler() = default;
+  virtual ~UnsafeBufferUsageHandler() = default;
+
+  /// Invoked when an unsafe operation over raw pointers is found.
+  virtual void handleUnsafeOperation(const Stmt *Operation) = 0;
+};
+
+// This function invokes the analysis and allows the caller to react to it
+// through the handler class.
+void checkUnsafeBufferUsage(const Decl *D, UnsafeBufferUsageHandler &Handler);
+
+} // end namespace clang
+
+#endif /* LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H */

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 15f85b778ccc6..6a831abebf694 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11732,4 +11732,9 @@ def err_cast_from_randomized_struct : Error<
 // LoongArch-specific Diagnostics
 def err_loongarch_builtin_requires_la64 : Error<
   "this builtin requires target: loongarch64">;
+
+// Unsafe buffer usage diagnostics.
+def warn_unsafe_buffer_usage : Warning<
+  "unchecked operation on raw buffer in expression">,
+  InGroup<DiagGroup<"unsafe-buffer-usage">>, DefaultIgnore;
 } // end of sema component.

diff  --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt
index 16e3f474060cb..ea6cb59e8e199 100644
--- a/clang/lib/Analysis/CMakeLists.txt
+++ b/clang/lib/Analysis/CMakeLists.txt
@@ -32,6 +32,7 @@ add_clang_library(clangAnalysis
   ThreadSafetyLogical.cpp
   ThreadSafetyTIL.cpp
   UninitializedValues.cpp
+  UnsafeBufferUsage.cpp
 
   LINK_LIBS
   clangAST

diff  --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
new file mode 100644
index 0000000000000..0f8fb68942116
--- /dev/null
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -0,0 +1,79 @@
+//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
+#include "llvm/ADT/SmallVector.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace ast_matchers;
+
+namespace {
+// TODO: Better abstractions over gadgets.
+using GadgetList = std::vector<const Stmt *>;
+}
+
+// Scan the function and return a list of gadgets found with provided kits.
+static GadgetList findGadgets(const Decl *D) {
+
+  class GadgetFinderCallback : public MatchFinder::MatchCallback {
+    GadgetList &Output;
+
+  public:
+    GadgetFinderCallback(GadgetList &Output) : Output(Output) {}
+
+    void run(const MatchFinder::MatchResult &Result) override {
+      Output.push_back(Result.Nodes.getNodeAs<Stmt>("root_node"));
+    }
+  };
+
+  GadgetList G;
+  MatchFinder M;
+
+  auto IncrementMatcher = unaryOperator(
+    hasOperatorName("++"),
+    hasUnaryOperand(hasType(pointerType()))
+  );
+  auto DecrementMatcher = unaryOperator(
+    hasOperatorName("--"),
+    hasUnaryOperand(hasType(pointerType()))
+  );
+
+  GadgetFinderCallback CB(G);
+
+  M.addMatcher(
+      stmt(forEachDescendant(
+        stmt(
+          anyOf(
+            IncrementMatcher,
+            DecrementMatcher
+            /* Fill me in! */
+          )
+          // FIXME: Idiomatically there should be a forCallable(equalsNode(D))
+          // here, to make sure that the statement actually belongs to the
+          // function and not to a nested function. However, forCallable uses
+          // ParentMap which can't be used before the AST is fully constructed.
+          // The original problem doesn't sound like it needs ParentMap though,
+          // maybe there's a more direct solution?
+        ).bind("root_node")
+      )), &CB);
+
+  M.match(*D->getBody(), D->getASTContext());
+
+  return G; // NRVO!
+}
+
+void clang::checkUnsafeBufferUsage(const Decl *D,
+                                   UnsafeBufferUsageHandler &Handler) {
+  assert(D && D->getBody());
+
+  GadgetList G = findGadgets(D);
+  for (const Stmt *S : G) {
+    Handler.handleUnsafeOperation(S);
+  }
+}

diff  --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 9780a0aba7498..70b81c00904af 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -29,6 +29,7 @@
 #include "clang/Analysis/Analyses/ReachableCode.h"
 #include "clang/Analysis/Analyses/ThreadSafety.h"
 #include "clang/Analysis/Analyses/UninitializedValues.h"
+#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
 #include "clang/Analysis/CFG.h"
 #include "clang/Analysis/CFGStmtMap.h"
@@ -2138,6 +2139,23 @@ class ConsumedWarningsHandler : public ConsumedWarningsHandlerBase {
 } // namespace consumed
 } // namespace clang
 
+//===----------------------------------------------------------------------===//
+// Unsafe buffer usage analysis.
+//===----------------------------------------------------------------------===//
+
+class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
+  Sema &S;
+
+public:
+  UnsafeBufferUsageReporter(Sema &S) : S(S) {}
+
+  void handleUnsafeOperation(const Stmt *Operation) override {
+    S.Diag(Operation->getBeginLoc(), diag::warn_unsafe_buffer_usage)
+        << Operation->getSourceRange();
+  }
+};
+
+
 //===----------------------------------------------------------------------===//
 // AnalysisBasedWarnings - Worker object used by Sema to execute analysis-based
 //  warnings on a function, method, or block.
@@ -2430,6 +2448,12 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
       if (S.getLangOpts().CPlusPlus && isNoexcept(FD))
         checkThrowInNonThrowingFunc(S, FD, AC);
 
+  // Emit unsafe buffer usage warnings and fixits.
+  if (!Diags.isIgnored(diag::warn_unsafe_buffer_usage, D->getBeginLoc())) {
+    UnsafeBufferUsageReporter R(S);
+    checkUnsafeBufferUsage(D, R);
+  }
+
   // If none of the previous checks caused a CFG build, trigger one here
   // for the logical error handler.
   if (LogicalErrorHandler::hasActiveDiagnostics(Diags, D->getBeginLoc())) {

diff  --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp
new file mode 100644
index 0000000000000..59f830d29e3b9
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -Wunsafe-buffer-usage -verify %s
+
+void testIncrement(char *p) {
+  ++p; // expected-warning{{unchecked operation on raw buffer in expression}}
+  p++; // expected-warning{{unchecked operation on raw buffer in expression}}
+  --p; // expected-warning{{unchecked operation on raw buffer in expression}}
+  p--; // expected-warning{{unchecked operation on raw buffer in expression}}
+}


        


More information about the cfe-commits mailing list