[clang] [WIP][Wunsafe-buffer-usage] False positives for & expression indexing constant size array (arr[anything & 0]) (PR #112284)

Malavika Samak via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 25 16:39:07 PDT 2024


https://github.com/malavikasamak updated https://github.com/llvm/llvm-project/pull/112284

>From 80e593f62c9f00e6d639b870ec4912de2b971864 Mon Sep 17 00:00:00 2001
From: MalavikaSamak <malavika2 at apple.com>
Date: Fri, 11 Oct 2024 12:24:58 -0700
Subject: [PATCH] [Wunsafe-buffer-usage] False positives for & expression
 indexing constant size array (arr[anything & 0])

Do not warn when a constant sized array is indexed with an expression that contains bitwise and operation
involving constants and it always results in a bound safe access.

(rdar://136684050)
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 142 +++++++++++++++++-
 .../warn-unsafe-buffer-usage-array.cpp        |  43 ++++++
 2 files changed, 177 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 97f1c4f16b8f4c..2477ab01d9eaa8 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -420,6 +420,122 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
   return false;
 }
 
+class MaxValueEval : public RecursiveASTVisitor<MaxValueEval> {
+
+  std::vector<llvm::APInt> val;
+  ASTContext &Context;
+  llvm::APInt Max;
+  unsigned bit_width;
+
+  public:
+  
+  typedef RecursiveASTVisitor<MaxValueEval> VisitorBase;
+ 
+  explicit MaxValueEval(ASTContext &Ctx, const Expr *exp): Context(Ctx) {
+    bit_width = Ctx.getIntWidth(exp->getType());
+    Max = llvm::APInt::getSignedMaxValue(bit_width);
+    //Max.setAllBits();
+    val.clear();
+  }
+
+  bool findMatch(Expr *exp) {
+    TraverseStmt(exp);
+    return true;
+  }
+
+  bool VisitDeclRefExpr(DeclRefExpr *dre) {
+    val.push_back(Max);
+    return false; 
+  }
+
+  bool VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
+    val.push_back(Max);
+    return false;
+  }
+
+  bool EvaluateExpression(Expr *exp) {
+    Expr::EvalResult EVResult;
+    if (exp->EvaluateAsInt(EVResult, Context)) {
+      llvm::APSInt Result = EVResult.Val.getInt();
+      val.push_back(Result);
+      return true;
+    }
+    return false;
+  }
+  
+  bool VisitBinaryOperator(BinaryOperator *E) {
+   
+   if(EvaluateExpression(E)) {
+      return false;
+   } else {
+     TraverseStmt(E->getLHS());
+     llvm::APInt LHS = val.back();
+     val.pop_back();
+
+     TraverseStmt(E->getRHS());
+     llvm::APInt RHS = val.back();
+     val.pop_back();
+     llvm::APInt Result = Max;
+ 
+      switch (E->getOpcode()) {
+        case BO_And:
+        case BO_AndAssign:
+          Result = LHS & RHS;
+          break;
+
+        case BO_Or:
+        case BO_OrAssign:
+          Result = LHS | RHS;
+          break;
+
+        case BO_Shl:
+        case BO_ShlAssign:
+          if(RHS != Max.getLimitedValue())
+            Result = LHS << RHS.getLimitedValue();
+          break;
+
+        case BO_Shr:
+        case BO_ShrAssign:
+          if(RHS == Max.getLimitedValue())
+            Result = LHS;
+          //else if(RHS.getLimitedValue() >= bit_width)
+          //  Result = llvm::APInt::getZero(bit_width);
+          else 
+            Result = LHS.getLimitedValue() >> RHS.getLimitedValue();
+          break;
+
+        case BO_Rem:
+        case BO_RemAssign:
+          if(LHS.getLimitedValue() < RHS.getLimitedValue())
+            Result = LHS;
+          else
+            Result = --RHS;
+          break;
+
+        default:
+          break;
+      } 
+      val.push_back(Result);
+      return false;
+    }
+    return true;
+  }
+ 
+  bool VisitExpr(Expr *E) {
+    if(EvaluateExpression(E)) {
+      return false;
+    } 
+    return VisitorBase::VisitExpr(E);
+  }
+  
+  APInt getValue() {
+    if(val.size() == 1)
+      return val[0];
+    else // A pattern we didn't consider was encountered
+      return Max;
+  }
+};
+
 AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   // FIXME: Proper solution:
   //  - refactor Sema::CheckArrayAccess
@@ -439,14 +555,13 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   if (!CATy)
     return false;
 
-  if (const auto *IdxLit = dyn_cast<IntegerLiteral>(Node.getIdx())) {
-    const APInt ArrIdx = IdxLit->getValue();
-    // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a
-    // bug
-    if (ArrIdx.isNonNegative() &&
-        ArrIdx.getLimitedValue() < CATy->getLimitedSize())
-      return true;
-  }
+  MaxValueEval Vis(Finder->getASTContext(), Node.getIdx());
+  Vis.findMatch(const_cast<Expr*>(Node.getIdx()));
+  APInt result = Vis.getValue();
+ 
+  if (result.isNonNegative() &&
+      result.getLimitedValue() < CATy->getLimitedSize())
+    return true;
 
   return false;
 }
@@ -1146,6 +1261,10 @@ class ArraySubscriptGadget : public WarningGadget {
     // clang-format on
   }
 
+  const ArraySubscriptExpr*  getASE() const{
+    return ASE;
+  }
+
   void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
                              bool IsRelatedToDecl,
                              ASTContext &Ctx) const override {
@@ -3904,6 +4023,13 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
                                           : FixItList{},
                                       D, NaiveStrategy);
     for (const auto &G : WarningGadgets) {
+      //const Stmt *Operation = Handler;
+      if(const auto ASG = dyn_cast<ArraySubscriptGadget>(G)) {
+       const auto * AS = ASG->getASE();
+       MaxValueEval Vis(VD->getASTContext(), AS->getIdx());
+       Vis.findMatch(const_cast<Expr*>(AS->getIdx()));
+       APInt result = Vis.getValue();
+      }
       G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/true,
                                D->getASTContext());
     }
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
index 8b2f103ec66708..6f0a974e45d9c6 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
@@ -33,6 +33,49 @@ void constant_idx_safe0(unsigned idx) {
   buffer[0] = 0;
 }
 
+int array[10]; // expected-warning 3{{'array' is an unsafe buffer that does not perform bounds checks}}
+
+void masked_idx1(unsigned long long idx) {
+  // Bitwise and operation
+  array[idx & 5] = 10; // no warning
+  array[idx & 11 & 5] = 3; // no warning
+  array[idx & 11] = 20; // expected-note{{used in buffer access here}}
+  array[(idx & 9) | 8];
+  array[idx &=5];
+}
+
+void masked_idx2(unsigned long long idx, unsigned long long idx2) {
+  array[idx] = 30; // expected-note{{used in buffer access here}}
+  
+  // Remainder operation
+  array[idx % 10];
+  array[10 % idx]; // expected-note{{used in buffer access here}}
+  array[9 % idx];
+  array[idx % idx2]; // expected-note{{used in buffer access here}}
+  array[idx %= 10];
+}
+
+void masked_idx3(unsigned long long idx) {
+  // Left shift operation <<
+  array[2 << 1];
+  array[8 << 1]; // expected-note{{used in buffer access here}}
+  array[2 << idx]; // expected-note{{used in buffer access here}}
+  array[idx << 2]; // expected-note{{used in buffer access here}}
+  
+  // Right shift operation >>
+  array[16 >> 1];
+  array[8 >> idx];  
+  array[idx >> 63];
+}
+
+int array2[5];
+
+void masked_idx_false(unsigned long long idx) {
+  array2[6 & 5]; // no warning
+  array2[6 & idx & (idx + 1) & 5]; // no warning
+  array2[6 & idx & 5 & (idx + 1) | 4]; // no warning 
+}
+
 void constant_idx_unsafe(unsigned idx) {
   int buffer[10];       // expected-warning{{'buffer' is an unsafe buffer that does not perform bounds checks}}
                         // expected-note at -1{{change type of 'buffer' to 'std::array' to label it for hardening}}



More information about the cfe-commits mailing list