[clang] [BoundsSafety][Sema] Allow counted_by and counted_by_or_null on pointers where the pointee type is incomplete but potentially completable (PR #106321)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 28 05:17:06 PDT 2024
================
@@ -186,4 +218,370 @@ bool Sema::CheckCountedByAttrOnField(FieldDecl *FD, Expr *E, bool CountInBytes,
return false;
}
+SourceRange Sema::BoundsSafetySourceRangeFor(const CountAttributedType *CATy) {
+ // Note: This implementation relies on `CountAttributedType` being unique.
+ // E.g.:
+ //
+ // struct Foo {
+ // int count;
+ // char* __counted_by(count) buffer;
+ // char* __counted_by(count) buffer2;
+ // };
+ //
+ // The types of `buffer` and `buffer2` are unique. The types being
+ // unique means the SourceLocation of the `counted_by` expression can be used
+ // to find where the attribute was written.
+
+ auto Fallback = CATy->getCountExpr()->getSourceRange();
+ auto CountExprBegin = CATy->getCountExpr()->getBeginLoc();
+
+ // FIXME: We currently don't support the count expression being a macro
+ // itself. E.g.:
+ //
+ // #define ZERO 0
+ // int* __counted_by(ZERO) x;
+ //
+ if (SourceMgr.isMacroBodyExpansion(CountExprBegin))
+ return Fallback;
+
+ auto FetchIdentifierTokenFromOffset =
+ [&](ssize_t Offset) -> std::optional<Token> {
+ SourceLocation OffsetLoc = CountExprBegin.getLocWithOffset(Offset);
+ Token Result;
+ if (Lexer::getRawToken(OffsetLoc, Result, SourceMgr, getLangOpts()))
+ return std::optional<Token>(); // Failed
+
+ if (!Result.isAnyIdentifier())
+ return std::optional<Token>(); // Failed
+
+ return Result; // Success
+ };
+
+ auto CountExprEnd = CATy->getCountExpr()->getEndLoc();
+ auto FindRParenTokenAfter = [&]() -> std::optional<Token> {
+ auto CountExprEndSpelling = SourceMgr.getSpellingLoc(CountExprEnd);
+ auto MaybeRParenTok = Lexer::findNextToken(
+ CountExprEndSpelling, getSourceManager(), getLangOpts());
+
+ if (!MaybeRParenTok.has_value())
+ return std::nullopt;
+
+ if (!MaybeRParenTok->is(tok::r_paren))
+ return std::nullopt;
+
+ return *MaybeRParenTok;
+ };
+
+ // Step back two characters to point at the last character of the attribute
+ // text.
+ //
+ // __counted_by(count)
+ // ^ ^
+ // | |
+ // ---
+ auto MaybeLastAttrCharToken = FetchIdentifierTokenFromOffset(-2);
+ if (!MaybeLastAttrCharToken)
+ return Fallback;
+
+ auto LastAttrCharToken = MaybeLastAttrCharToken.value();
+
+ if (LastAttrCharToken.getLength() > 1) {
+ // Special case: When the character is part of a macro the Token we get
+ // is the whole macro name (e.g. `__counted_by`).
+ if (LastAttrCharToken.getRawIdentifier() !=
+ CATy->GetAttributeName(/*WithMacroPrefix=*/true))
+ return Fallback;
+
+ // Found the beginning of the `__counted_by` macro
+ SourceLocation Begin = LastAttrCharToken.getLocation();
+ // Now try to find the closing `)` of the macro.
+ auto MaybeRParenTok = FindRParenTokenAfter();
+ if (!MaybeRParenTok.has_value())
+ return Fallback;
+
+ return SourceRange(Begin, MaybeRParenTok->getLocation());
+ }
+
+ assert(LastAttrCharToken.getLength() == 1);
+ // The Token we got back is just the last character of the identifier.
+ // This means a macro is not being used and instead the attribute is being
+ // used directly. We need to find the beginning of the identifier. We support
+ // two cases:
+ //
+ // * Non-affixed version. E.g: `counted_by`
+ // * Affixed version. E.g.: `__counted_by__`
+
+ // Try non-affixed version. E.g.:
+ //
+ // __attribute__((counted_by(count)))
+ // ^ ^
+ // | |
+ // ------------
+
+ // +1 is for `(`
+ const ssize_t NonAffixedSkipCount =
+ CATy->GetAttributeName(/*WithMacroPrefix=*/false).size() + 1;
+ auto MaybeNonAffixedBeginToken =
+ FetchIdentifierTokenFromOffset(-NonAffixedSkipCount);
+ if (!MaybeNonAffixedBeginToken)
+ return Fallback;
+
+ auto NonAffixedBeginToken = MaybeNonAffixedBeginToken.value();
+ if (NonAffixedBeginToken.getRawIdentifier() ==
+ CATy->GetAttributeName(/*WithMacroPrefix=*/false)) {
+ // Found the beginning of the `counted_by`-like attribute
+ auto SL = NonAffixedBeginToken.getLocation();
+
+ // Now try to find the closing `)` of the attribute
+ auto MaybeRParenTok = FindRParenTokenAfter();
+ if (!MaybeRParenTok.has_value())
+ return Fallback;
+
+ return SourceRange(SL, MaybeRParenTok->getLocation());
+ }
+
+ // Try affixed version. E.g.:
+ //
+ // __attribute__((__counted_by__(count)))
+ // ^ ^
+ // | |
+ // ----------------
+ std::string AffixedTokenStr =
+ (llvm::Twine("__") + CATy->GetAttributeName(/*WithMacroPrefix=*/false) +
+ llvm::Twine("__"))
+ .str();
+ // +1 is for `(`
+ // +4 is for the 4 `_` characters
+ const ssize_t AffixedSkipCount =
+ CATy->GetAttributeName(/*WithMacroPrefix=*/false).size() + 1 + 4;
+ auto MaybeAffixedBeginToken =
+ FetchIdentifierTokenFromOffset(-AffixedSkipCount);
+ if (!MaybeAffixedBeginToken)
+ return Fallback;
+
+ auto AffixedBeginToken = MaybeAffixedBeginToken.value();
+ if (AffixedBeginToken.getRawIdentifier() != AffixedTokenStr)
+ return Fallback;
+
+ // Found the beginning of the `__counted_by__`-like like attribute.
+ auto SL = AffixedBeginToken.getLocation();
+ // Now try to find the closing `)` of the attribute
+ auto MaybeRParenTok = FindRParenTokenAfter();
+ if (!MaybeRParenTok.has_value())
+ return Fallback;
+
+ return SourceRange(SL, MaybeRParenTok->getLocation());
+}
+
+static void EmitIncompleteCountedByPointeeNotes(Sema &S,
+ const CountAttributedType *CATy,
+ NamedDecl *IncompleteTyDecl,
+ bool NoteAttrLocation = true) {
+ assert(IncompleteTyDecl == nullptr || isa<TypeDecl>(IncompleteTyDecl));
+
+ if (NoteAttrLocation) {
+ // Note where the attribute is declared
+ auto AttrSrcRange = S.BoundsSafetySourceRangeFor(CATy);
+ S.Diag(AttrSrcRange.getBegin(), diag::note_named_attribute)
+ << CATy->GetAttributeName(/*WithMacroPrefix=*/true) << AttrSrcRange;
+ }
+
+ if (!IncompleteTyDecl)
+ return;
+
+ // If there's an associated forward declaration display it to emphasize
+ // why the type is incomplete (all we have is a forward declaration).
+
+ // Note the `IncompleteTyDecl` type is the underlying type which might not
+ // be the same as `CATy->getPointeeType()` which could be a typedef.
+ //
+ // The diagnostic printed will be at the location of the underlying type but
+ // the diagnostic text will print the type of `CATy->getPointeeType()` which
+ // could be a typedef name rather than the underlying type. This is ok
+ // though because the diagnostic will print the underlying type name too.
+ // E.g:
+ //
+ // `forward declaration of 'Incomplete_Struct_t'
+ // (aka 'struct IncompleteStructTy')`
+ //
+ // If this ends up being confusing we could emit a second diagnostic (one
+ // explaining where the typedef is) but that seems overly verbose.
+
+ S.Diag(IncompleteTyDecl->getBeginLoc(), diag::note_forward_declaration)
+ << CATy->getPointeeType();
+}
+
+static bool
+HasCountedByAttrOnIncompletePointee(QualType Ty, NamedDecl **ND,
+ const CountAttributedType **CATyOut,
+ QualType *PointeeTyOut) {
+ auto *CATy = Ty->getAs<CountAttributedType>();
+ if (!CATy)
+ return false;
+
+ // Incomplete pointee type is only a problem for
+ // counted_by/counted_by_or_null
+ if (CATy->isCountInBytes())
+ return false;
+
+ auto PointeeTy = CATy->getPointeeType();
+ if (PointeeTy.isNull())
+ return false; // Reachable?
+
+ if (!PointeeTy->isIncompleteType(ND))
+ return false;
+
+ if (CATyOut)
+ *CATyOut = CATy;
+ if (PointeeTyOut)
+ *PointeeTyOut = PointeeTy;
+ return true;
+}
+
+/// Perform Checks for assigning to a `__counted_by` or
+/// `__counted_by_or_null` pointer type \param LHSTy where the pointee type
+/// is incomplete which is invalid.
+///
+/// \param S The Sema instance.
+/// \param LHSTy The type being assigned to. Checks will only be performed if
+/// the type is a `counted_by` or `counted_by_or_null ` pointer.
+/// \param RHSExpr The expression being assigned from.
+/// \param Action The type assignment being performed
+/// \param Loc The SourceLocation to use for error diagnostics
+/// \param ComputeAssignee If provided this function will be called before
+/// emitting a diagnostic. The function should return the name of
+/// entity being assigned to or an empty string if this cannot be
+/// determined.
+///
+/// \returns True iff no diagnostic where emitted, false otherwise.
+static bool BoundsSafetyCheckAssignmentToCountAttrPtrWithIncompletePointeeTy(
+ Sema &S, QualType LHSTy, Expr *RHSExpr, Sema::AssignmentAction Action,
+ SourceLocation Loc, std::function<std::string()> ComputeAssignee) {
+ NamedDecl *IncompleteTyDecl = nullptr;
+ const CountAttributedType *CATy = nullptr;
+ QualType PointeeTy;
+ if (!HasCountedByAttrOnIncompletePointee(LHSTy, &IncompleteTyDecl, &CATy,
+ &PointeeTy))
+ return true;
+ assert(CATy && !CATy->isCountInBytes() && !PointeeTy.isNull());
+
+ // It's not expected that the diagnostic be emitted in these cases.
+ // It's not necessarily a problem but we should catch when this starts
+ // to happen.
+ assert(Action != Sema::AA_Converting && Action != Sema::AA_Sending &&
+ Action != Sema::AA_Casting && Action != Sema::AA_Passing_CFAudited);
+
+ // By having users provide a function we only pay the cost of allocation the
+ // string and computing when a diagnostic is emitted.
+ std::string Assignee = ComputeAssignee ? ComputeAssignee() : "";
+ {
+ auto D = S.Diag(Loc, diag::err_counted_by_on_incomplete_type_on_assign)
+ << /*0*/ (int)Action << /*1*/ Assignee
+ << /*2*/ (Assignee.size() > 0)
+ << /*3*/ isa<ImplicitValueInitExpr>(RHSExpr) << /*4*/ LHSTy
+ << /*5*/ CATy->GetAttributeName(/*WithMacroPrefix=*/true)
+ << /*6*/ PointeeTy << /*7*/ CATy->isOrNull();
+
+ if (RHSExpr->getSourceRange().isValid())
+ D << RHSExpr->getSourceRange();
+ }
+
+ EmitIncompleteCountedByPointeeNotes(S, CATy, IncompleteTyDecl);
+ return false; // check failed
+}
+
+bool Sema::BoundsSafetyCheckAssignmentToCountAttrPtr(
+ QualType LHSTy, Expr *RHSExpr, Sema::AssignmentAction Action,
+ SourceLocation Loc, std::function<std::string()> ComputeAssignee) {
+ return BoundsSafetyCheckAssignmentToCountAttrPtrWithIncompletePointeeTy(
+ *this, LHSTy, RHSExpr, Action, Loc, ComputeAssignee);
+}
+
+bool Sema::BoundsSafetyCheckInitialization(const InitializedEntity &Entity,
+ const InitializationKind &Kind,
+ Sema::AssignmentAction Action,
+ QualType LHSType, Expr *RHSExpr) {
+ bool ChecksPassed = true;
+ auto SL = Kind.getLocation();
+
+ // Note: We don't call `BoundsSafetyCheckAssignmentToCountAttrPtr` here
+ // because we need conditionalize what is checked. In downstream
+ // Clang `counted_by` is supported on variable definitions and in that
+ // implementation an error diagnostic will be emitted on the variable
+ // definition if the pointee is an incomplete type. To avoid warning about the
+ // same problem twice (once when the variable is defined, once when Sema
+ // checks the initializer) we skip checking the initializer if it's a
+ // variable.
+ if (Action == Sema::AA_Initializing &&
+ Entity.getKind() != InitializedEntity::EK_Variable) {
+
+ if (!BoundsSafetyCheckAssignmentToCountAttrPtrWithIncompletePointeeTy(
+ *this, LHSType, RHSExpr, Action, SL, [&Entity]() -> std::string {
+ if (const auto *VD =
+ dyn_cast_or_null<ValueDecl>(Entity.getDecl())) {
+ return VD->getQualifiedNameAsString();
+ }
+ return "";
+ })) {
+ ChecksPassed = false;
+
+ // It's not necessarily bad if this assert fails but we should catch
+ // if this happens.
+ assert(Entity.getKind() == InitializedEntity::EK_Member);
+ }
+ }
+
+ return ChecksPassed;
+}
+
+static bool BoundsSafetyCheckUseOfCountAttrPtrWithIncompletePointeeTy(Sema &S,
+ Expr *E) {
+ QualType T = E->getType();
+ assert(T->isPointerType());
+
+ const CountAttributedType *CATy = nullptr;
+ QualType PointeeTy;
+ NamedDecl *IncompleteTyDecl = nullptr;
+ if (!HasCountedByAttrOnIncompletePointee(T, &IncompleteTyDecl, &CATy,
+ &PointeeTy))
+ return true;
+ assert(CATy && !CATy->isCountInBytes() && !PointeeTy.isNull());
+
+ // Generate a string for the diagnostic that describes the "use".
+ // The string is specialized for direct calls to produce a better
+ // diagnostic.
+ StringRef DirectCallFn;
+ std::string UseStr;
+ if (const auto *CE = dyn_cast<CallExpr>(E->IgnoreParens())) {
+ if (const auto *FD = CE->getDirectCallee()) {
+ DirectCallFn = FD->getName();
+ }
+ }
+ int SelectExprKind = DirectCallFn.size() > 0 ? 1 : 0;
+ if (SelectExprKind) {
+ UseStr = DirectCallFn;
+ } else {
+ llvm::raw_string_ostream SS(UseStr);
+ E->printPretty(SS, nullptr, PrintingPolicy(S.getPrintingPolicy()));
+ }
+ assert(UseStr.size() > 0);
+
+ S.Diag(E->getBeginLoc(), diag::err_counted_by_on_incomplete_type_on_use)
+ << /*0*/ SelectExprKind << /*1*/ UseStr << /*2*/ T << /*3*/ PointeeTy
+ << /*4*/ CATy->GetAttributeName(/*WithMacroPrefix=*/true)
+ << /*5*/ CATy->isOrNull() << E->getSourceRange();
----------------
Sirraide wrote:
```suggestion
S.Diag(E->getBeginLoc(), diag::err_counted_by_on_incomplete_type_on_use)
<< SelectExprKind << UseStr << T << PointeeTy
<< CATy->GetAttributeName(/*WithMacroPrefix=*/true)
<< CATy->isOrNull() << E->getSourceRange();
```
https://github.com/llvm/llvm-project/pull/106321
More information about the cfe-commits
mailing list