[clang] [BoundsSafety][Sema] Allow counted_by and counted_by_or_null on pointers where the pointee type is incomplete but potentially completable (PR #106321)

Dan Liew via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 28 12:09:32 PDT 2024


================
@@ -186,4 +218,370 @@ bool Sema::CheckCountedByAttrOnField(FieldDecl *FD, Expr *E, bool CountInBytes,
   return false;
 }
 
+SourceRange Sema::BoundsSafetySourceRangeFor(const CountAttributedType *CATy) {
+  // Note: This implementation relies on `CountAttributedType` being unique.
+  // E.g.:
+  //
+  // struct Foo {
+  //   int count;
+  //   char* __counted_by(count) buffer;
+  //   char* __counted_by(count) buffer2;
+  // };
+  //
+  // The types of `buffer` and `buffer2` are unique. The types being
+  // unique means the SourceLocation of the `counted_by` expression can be used
+  // to find where the attribute was written.
+
+  auto Fallback = CATy->getCountExpr()->getSourceRange();
+  auto CountExprBegin = CATy->getCountExpr()->getBeginLoc();
+
+  // FIXME: We currently don't support the count expression being a macro
+  // itself. E.g.:
+  //
+  // #define ZERO 0
+  // int* __counted_by(ZERO) x;
+  //
+  if (SourceMgr.isMacroBodyExpansion(CountExprBegin))
+    return Fallback;
+
+  auto FetchIdentifierTokenFromOffset =
+      [&](ssize_t Offset) -> std::optional<Token> {
+    SourceLocation OffsetLoc = CountExprBegin.getLocWithOffset(Offset);
+    Token Result;
+    if (Lexer::getRawToken(OffsetLoc, Result, SourceMgr, getLangOpts()))
+      return std::optional<Token>(); // Failed
+
+    if (!Result.isAnyIdentifier())
+      return std::optional<Token>(); // Failed
+
+    return Result; // Success
+  };
+
+  auto CountExprEnd = CATy->getCountExpr()->getEndLoc();
+  auto FindRParenTokenAfter = [&]() -> std::optional<Token> {
+    auto CountExprEndSpelling = SourceMgr.getSpellingLoc(CountExprEnd);
+    auto MaybeRParenTok = Lexer::findNextToken(
+        CountExprEndSpelling, getSourceManager(), getLangOpts());
+
+    if (!MaybeRParenTok.has_value())
+      return std::nullopt;
+
+    if (!MaybeRParenTok->is(tok::r_paren))
+      return std::nullopt;
+
+    return *MaybeRParenTok;
+  };
+
+  // Step back two characters to point at the last character of the attribute
+  // text.
+  //
+  // __counted_by(count)
+  //            ^ ^
+  //            | |
+  //            ---
+  auto MaybeLastAttrCharToken = FetchIdentifierTokenFromOffset(-2);
+  if (!MaybeLastAttrCharToken)
+    return Fallback;
+
+  auto LastAttrCharToken = MaybeLastAttrCharToken.value();
+
+  if (LastAttrCharToken.getLength() > 1) {
+    // Special case: When the character is part of a macro the Token we get
+    // is the whole macro name (e.g. `__counted_by`).
+    if (LastAttrCharToken.getRawIdentifier() !=
+        CATy->GetAttributeName(/*WithMacroPrefix=*/true))
+      return Fallback;
+
+    // Found the beginning of the `__counted_by` macro
+    SourceLocation Begin = LastAttrCharToken.getLocation();
+    // Now try to find the closing `)` of the macro.
+    auto MaybeRParenTok = FindRParenTokenAfter();
+    if (!MaybeRParenTok.has_value())
+      return Fallback;
+
+    return SourceRange(Begin, MaybeRParenTok->getLocation());
+  }
+
+  assert(LastAttrCharToken.getLength() == 1);
+  // The Token we got back is just the last character of the identifier.
+  // This means a macro is not being used and instead the attribute is being
+  // used directly. We need to find the beginning of the identifier. We support
+  // two cases:
+  //
+  // * Non-affixed version. E.g: `counted_by`
+  // * Affixed version. E.g.: `__counted_by__`
+
+  // Try non-affixed version. E.g.:
+  //
+  // __attribute__((counted_by(count)))
+  //                ^          ^
+  //                |          |
+  //                ------------
+
+  // +1 is for `(`
+  const ssize_t NonAffixedSkipCount =
+      CATy->GetAttributeName(/*WithMacroPrefix=*/false).size() + 1;
+  auto MaybeNonAffixedBeginToken =
+      FetchIdentifierTokenFromOffset(-NonAffixedSkipCount);
+  if (!MaybeNonAffixedBeginToken)
+    return Fallback;
+
+  auto NonAffixedBeginToken = MaybeNonAffixedBeginToken.value();
+  if (NonAffixedBeginToken.getRawIdentifier() ==
+      CATy->GetAttributeName(/*WithMacroPrefix=*/false)) {
+    // Found the beginning of the `counted_by`-like attribute
+    auto SL = NonAffixedBeginToken.getLocation();
+
+    // Now try to find the closing `)` of the attribute
+    auto MaybeRParenTok = FindRParenTokenAfter();
+    if (!MaybeRParenTok.has_value())
+      return Fallback;
+
+    return SourceRange(SL, MaybeRParenTok->getLocation());
+  }
+
+  // Try affixed version. E.g.:
+  //
+  // __attribute__((__counted_by__(count)))
+  //                ^              ^
+  //                |              |
+  //                ----------------
+  std::string AffixedTokenStr =
+      (llvm::Twine("__") + CATy->GetAttributeName(/*WithMacroPrefix=*/false) +
+       llvm::Twine("__"))
+          .str();
+  // +1 is for `(`
+  // +4 is for the 4 `_` characters
+  const ssize_t AffixedSkipCount =
+      CATy->GetAttributeName(/*WithMacroPrefix=*/false).size() + 1 + 4;
+  auto MaybeAffixedBeginToken =
+      FetchIdentifierTokenFromOffset(-AffixedSkipCount);
+  if (!MaybeAffixedBeginToken)
+    return Fallback;
+
+  auto AffixedBeginToken = MaybeAffixedBeginToken.value();
+  if (AffixedBeginToken.getRawIdentifier() != AffixedTokenStr)
+    return Fallback;
+
+  // Found the beginning of the `__counted_by__`-like like attribute.
+  auto SL = AffixedBeginToken.getLocation();
+  // Now try to find the closing `)` of the attribute
+  auto MaybeRParenTok = FindRParenTokenAfter();
+  if (!MaybeRParenTok.has_value())
+    return Fallback;
+
+  return SourceRange(SL, MaybeRParenTok->getLocation());
+}
+
+static void EmitIncompleteCountedByPointeeNotes(Sema &S,
+                                                const CountAttributedType *CATy,
+                                                NamedDecl *IncompleteTyDecl,
+                                                bool NoteAttrLocation = true) {
+  assert(IncompleteTyDecl == nullptr || isa<TypeDecl>(IncompleteTyDecl));
+
+  if (NoteAttrLocation) {
+    // Note where the attribute is declared
+    auto AttrSrcRange = S.BoundsSafetySourceRangeFor(CATy);
+    S.Diag(AttrSrcRange.getBegin(), diag::note_named_attribute)
+        << CATy->GetAttributeName(/*WithMacroPrefix=*/true) << AttrSrcRange;
+  }
+
+  if (!IncompleteTyDecl)
+    return;
+
+  // If there's an associated forward declaration display it to emphasize
+  // why the type is incomplete (all we have is a forward declaration).
+
+  // Note the `IncompleteTyDecl` type is the underlying type which might not
+  // be the same as `CATy->getPointeeType()` which could be a typedef.
+  //
+  // The diagnostic printed will be at the location of the underlying type but
+  // the diagnostic text will print the type of `CATy->getPointeeType()` which
+  // could be a typedef name rather than the underlying type. This is ok
+  // though because the diagnostic will print the underlying type name too.
+  // E.g:
+  //
+  // `forward declaration of 'Incomplete_Struct_t'
+  //  (aka 'struct IncompleteStructTy')`
+  //
+  // If this ends up being confusing we could emit a second diagnostic (one
+  // explaining where the typedef is) but that seems overly verbose.
+
+  S.Diag(IncompleteTyDecl->getBeginLoc(), diag::note_forward_declaration)
+      << CATy->getPointeeType();
+}
+
+static bool
+HasCountedByAttrOnIncompletePointee(QualType Ty, NamedDecl **ND,
+                                    const CountAttributedType **CATyOut,
+                                    QualType *PointeeTyOut) {
+  auto *CATy = Ty->getAs<CountAttributedType>();
+  if (!CATy)
+    return false;
+
+  // Incomplete pointee type is only a problem for
+  // counted_by/counted_by_or_null
+  if (CATy->isCountInBytes())
+    return false;
+
+  auto PointeeTy = CATy->getPointeeType();
+  if (PointeeTy.isNull())
+    return false; // Reachable?
+
+  if (!PointeeTy->isIncompleteType(ND))
+    return false;
+
+  if (CATyOut)
+    *CATyOut = CATy;
+  if (PointeeTyOut)
+    *PointeeTyOut = PointeeTy;
+  return true;
+}
+
+/// Perform Checks for assigning to a `__counted_by` or
+/// `__counted_by_or_null` pointer type \param LHSTy where the pointee type
+/// is incomplete which is invalid.
+///
+/// \param S The Sema instance.
+/// \param LHSTy The type being assigned to. Checks will only be performed if
+///              the type is a `counted_by` or `counted_by_or_null ` pointer.
+/// \param RHSExpr The expression being assigned from.
+/// \param Action The type assignment being performed
+/// \param Loc The SourceLocation to use for error diagnostics
+/// \param ComputeAssignee If provided this function will be called before
+///        emitting a diagnostic. The function should return the name of
+///        entity being assigned to or an empty string if this cannot be
+///        determined.
+///
+/// \returns True iff no diagnostic where emitted, false otherwise.
+static bool BoundsSafetyCheckAssignmentToCountAttrPtrWithIncompletePointeeTy(
+    Sema &S, QualType LHSTy, Expr *RHSExpr, Sema::AssignmentAction Action,
+    SourceLocation Loc, std::function<std::string()> ComputeAssignee) {
+  NamedDecl *IncompleteTyDecl = nullptr;
+  const CountAttributedType *CATy = nullptr;
+  QualType PointeeTy;
+  if (!HasCountedByAttrOnIncompletePointee(LHSTy, &IncompleteTyDecl, &CATy,
+                                           &PointeeTy))
+    return true;
+  assert(CATy && !CATy->isCountInBytes() && !PointeeTy.isNull());
+
+  // It's not expected that the diagnostic be emitted in these cases.
+  // It's not necessarily a problem but we should catch when this starts
+  // to happen.
+  assert(Action != Sema::AA_Converting && Action != Sema::AA_Sending &&
+         Action != Sema::AA_Casting && Action != Sema::AA_Passing_CFAudited);
+
+  // By having users provide a function we only pay the cost of allocation the
+  // string and computing when a diagnostic is emitted.
+  std::string Assignee = ComputeAssignee ? ComputeAssignee() : "";
+  {
+    auto D = S.Diag(Loc, diag::err_counted_by_on_incomplete_type_on_assign)
+             << /*0*/ (int)Action << /*1*/ Assignee
+             << /*2*/ (Assignee.size() > 0)
+             << /*3*/ isa<ImplicitValueInitExpr>(RHSExpr) << /*4*/ LHSTy
+             << /*5*/ CATy->GetAttributeName(/*WithMacroPrefix=*/true)
+             << /*6*/ PointeeTy << /*7*/ CATy->isOrNull();
+
+    if (RHSExpr->getSourceRange().isValid())
+      D << RHSExpr->getSourceRange();
+  }
+
+  EmitIncompleteCountedByPointeeNotes(S, CATy, IncompleteTyDecl);
+  return false; // check failed
+}
+
+bool Sema::BoundsSafetyCheckAssignmentToCountAttrPtr(
+    QualType LHSTy, Expr *RHSExpr, Sema::AssignmentAction Action,
+    SourceLocation Loc, std::function<std::string()> ComputeAssignee) {
+  return BoundsSafetyCheckAssignmentToCountAttrPtrWithIncompletePointeeTy(
+      *this, LHSTy, RHSExpr, Action, Loc, ComputeAssignee);
+}
+
+bool Sema::BoundsSafetyCheckInitialization(const InitializedEntity &Entity,
+                                           const InitializationKind &Kind,
+                                           Sema::AssignmentAction Action,
+                                           QualType LHSType, Expr *RHSExpr) {
+  bool ChecksPassed = true;
+  auto SL = Kind.getLocation();
+
+  // Note: We don't call `BoundsSafetyCheckAssignmentToCountAttrPtr` here
+  // because we need conditionalize what is checked. In downstream
+  // Clang `counted_by` is supported on variable definitions and in that
+  // implementation an error diagnostic will be emitted on the variable
+  // definition if the pointee is an incomplete type. To avoid warning about the
+  // same problem twice (once when the variable is defined, once when Sema
+  // checks the initializer) we skip checking the initializer if it's a
+  // variable.
+  if (Action == Sema::AA_Initializing &&
+      Entity.getKind() != InitializedEntity::EK_Variable) {
+
+    if (!BoundsSafetyCheckAssignmentToCountAttrPtrWithIncompletePointeeTy(
+            *this, LHSType, RHSExpr, Action, SL, [&Entity]() -> std::string {
+              if (const auto *VD =
+                      dyn_cast_or_null<ValueDecl>(Entity.getDecl())) {
+                return VD->getQualifiedNameAsString();
+              }
+              return "";
+            })) {
+      ChecksPassed = false;
+
+      // It's not necessarily bad if this assert fails but we should catch
+      // if this happens.
+      assert(Entity.getKind() == InitializedEntity::EK_Member);
+    }
+  }
+
+  return ChecksPassed;
+}
+
+static bool BoundsSafetyCheckUseOfCountAttrPtrWithIncompletePointeeTy(Sema &S,
+                                                                      Expr *E) {
+  QualType T = E->getType();
+  assert(T->isPointerType());
+
+  const CountAttributedType *CATy = nullptr;
+  QualType PointeeTy;
+  NamedDecl *IncompleteTyDecl = nullptr;
+  if (!HasCountedByAttrOnIncompletePointee(T, &IncompleteTyDecl, &CATy,
+                                           &PointeeTy))
+    return true;
+  assert(CATy && !CATy->isCountInBytes() && !PointeeTy.isNull());
+
+  // Generate a string for the diagnostic that describes the "use".
+  // The string is specialized for direct calls to produce a better
+  // diagnostic.
+  StringRef DirectCallFn;
+  std::string UseStr;
+  if (const auto *CE = dyn_cast<CallExpr>(E->IgnoreParens())) {
+    if (const auto *FD = CE->getDirectCallee()) {
+      DirectCallFn = FD->getName();
+    }
+  }
+  int SelectExprKind = DirectCallFn.size() > 0 ? 1 : 0;
+  if (SelectExprKind) {
+    UseStr = DirectCallFn;
+  } else {
+    llvm::raw_string_ostream SS(UseStr);
+    E->printPretty(SS, nullptr, PrintingPolicy(S.getPrintingPolicy()));
+  }
+  assert(UseStr.size() > 0);
+
+  S.Diag(E->getBeginLoc(), diag::err_counted_by_on_incomplete_type_on_use)
+      << /*0*/ SelectExprKind << /*1*/ UseStr << /*2*/ T << /*3*/ PointeeTy
+      << /*4*/ CATy->GetAttributeName(/*WithMacroPrefix=*/true)
+      << /*5*/ CATy->isOrNull() << E->getSourceRange();
----------------
delcypher wrote:

Is this just a style thing? I personally find numbering diagnostics that take a lot of arguments very useful when trying to match up the argument with the string in `DiagnosticSemaKinds.td`.

https://github.com/llvm/llvm-project/pull/106321


More information about the cfe-commits mailing list