[clang] [BoundsSafety][Sema] Allow counted_by and counted_by_or_null on pointers where the pointee type is incomplete but potentially completable (PR #106321)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 28 05:17:06 PDT 2024
================
@@ -186,4 +218,370 @@ bool Sema::CheckCountedByAttrOnField(FieldDecl *FD, Expr *E, bool CountInBytes,
return false;
}
+SourceRange Sema::BoundsSafetySourceRangeFor(const CountAttributedType *CATy) {
+ // Note: This implementation relies on `CountAttributedType` being unique.
+ // E.g.:
+ //
+ // struct Foo {
+ // int count;
+ // char* __counted_by(count) buffer;
+ // char* __counted_by(count) buffer2;
+ // };
+ //
+ // The types of `buffer` and `buffer2` are unique. The types being
+ // unique means the SourceLocation of the `counted_by` expression can be used
+ // to find where the attribute was written.
+
+ auto Fallback = CATy->getCountExpr()->getSourceRange();
+ auto CountExprBegin = CATy->getCountExpr()->getBeginLoc();
+
+ // FIXME: We currently don't support the count expression being a macro
+ // itself. E.g.:
+ //
+ // #define ZERO 0
+ // int* __counted_by(ZERO) x;
+ //
+ if (SourceMgr.isMacroBodyExpansion(CountExprBegin))
+ return Fallback;
+
+ auto FetchIdentifierTokenFromOffset =
+ [&](ssize_t Offset) -> std::optional<Token> {
+ SourceLocation OffsetLoc = CountExprBegin.getLocWithOffset(Offset);
+ Token Result;
+ if (Lexer::getRawToken(OffsetLoc, Result, SourceMgr, getLangOpts()))
+ return std::optional<Token>(); // Failed
+
+ if (!Result.isAnyIdentifier())
+ return std::optional<Token>(); // Failed
+
+ return Result; // Success
+ };
+
+ auto CountExprEnd = CATy->getCountExpr()->getEndLoc();
+ auto FindRParenTokenAfter = [&]() -> std::optional<Token> {
+ auto CountExprEndSpelling = SourceMgr.getSpellingLoc(CountExprEnd);
+ auto MaybeRParenTok = Lexer::findNextToken(
+ CountExprEndSpelling, getSourceManager(), getLangOpts());
+
+ if (!MaybeRParenTok.has_value())
+ return std::nullopt;
+
+ if (!MaybeRParenTok->is(tok::r_paren))
+ return std::nullopt;
+
+ return *MaybeRParenTok;
+ };
+
+ // Step back two characters to point at the last character of the attribute
+ // text.
+ //
+ // __counted_by(count)
+ // ^ ^
+ // | |
+ // ---
+ auto MaybeLastAttrCharToken = FetchIdentifierTokenFromOffset(-2);
+ if (!MaybeLastAttrCharToken)
+ return Fallback;
+
+ auto LastAttrCharToken = MaybeLastAttrCharToken.value();
+
+ if (LastAttrCharToken.getLength() > 1) {
+ // Special case: When the character is part of a macro the Token we get
+ // is the whole macro name (e.g. `__counted_by`).
+ if (LastAttrCharToken.getRawIdentifier() !=
+ CATy->GetAttributeName(/*WithMacroPrefix=*/true))
+ return Fallback;
+
+ // Found the beginning of the `__counted_by` macro
+ SourceLocation Begin = LastAttrCharToken.getLocation();
+ // Now try to find the closing `)` of the macro.
+ auto MaybeRParenTok = FindRParenTokenAfter();
+ if (!MaybeRParenTok.has_value())
+ return Fallback;
+
+ return SourceRange(Begin, MaybeRParenTok->getLocation());
+ }
+
+ assert(LastAttrCharToken.getLength() == 1);
+ // The Token we got back is just the last character of the identifier.
+ // This means a macro is not being used and instead the attribute is being
+ // used directly. We need to find the beginning of the identifier. We support
+ // two cases:
+ //
+ // * Non-affixed version. E.g: `counted_by`
+ // * Affixed version. E.g.: `__counted_by__`
+
+ // Try non-affixed version. E.g.:
+ //
+ // __attribute__((counted_by(count)))
+ // ^ ^
+ // | |
+ // ------------
+
+ // +1 is for `(`
+ const ssize_t NonAffixedSkipCount =
+ CATy->GetAttributeName(/*WithMacroPrefix=*/false).size() + 1;
+ auto MaybeNonAffixedBeginToken =
+ FetchIdentifierTokenFromOffset(-NonAffixedSkipCount);
+ if (!MaybeNonAffixedBeginToken)
+ return Fallback;
+
+ auto NonAffixedBeginToken = MaybeNonAffixedBeginToken.value();
+ if (NonAffixedBeginToken.getRawIdentifier() ==
+ CATy->GetAttributeName(/*WithMacroPrefix=*/false)) {
+ // Found the beginning of the `counted_by`-like attribute
+ auto SL = NonAffixedBeginToken.getLocation();
+
+ // Now try to find the closing `)` of the attribute
+ auto MaybeRParenTok = FindRParenTokenAfter();
+ if (!MaybeRParenTok.has_value())
+ return Fallback;
+
+ return SourceRange(SL, MaybeRParenTok->getLocation());
+ }
+
+ // Try affixed version. E.g.:
+ //
+ // __attribute__((__counted_by__(count)))
+ // ^ ^
+ // | |
+ // ----------------
+ std::string AffixedTokenStr =
+ (llvm::Twine("__") + CATy->GetAttributeName(/*WithMacroPrefix=*/false) +
+ llvm::Twine("__"))
+ .str();
+ // +1 is for `(`
+ // +4 is for the 4 `_` characters
+ const ssize_t AffixedSkipCount =
+ CATy->GetAttributeName(/*WithMacroPrefix=*/false).size() + 1 + 4;
+ auto MaybeAffixedBeginToken =
+ FetchIdentifierTokenFromOffset(-AffixedSkipCount);
+ if (!MaybeAffixedBeginToken)
+ return Fallback;
+
+ auto AffixedBeginToken = MaybeAffixedBeginToken.value();
+ if (AffixedBeginToken.getRawIdentifier() != AffixedTokenStr)
+ return Fallback;
+
+ // Found the beginning of the `__counted_by__`-like like attribute.
+ auto SL = AffixedBeginToken.getLocation();
+ // Now try to find the closing `)` of the attribute
+ auto MaybeRParenTok = FindRParenTokenAfter();
+ if (!MaybeRParenTok.has_value())
+ return Fallback;
+
+ return SourceRange(SL, MaybeRParenTok->getLocation());
+}
+
+static void EmitIncompleteCountedByPointeeNotes(Sema &S,
+ const CountAttributedType *CATy,
+ NamedDecl *IncompleteTyDecl,
+ bool NoteAttrLocation = true) {
+ assert(IncompleteTyDecl == nullptr || isa<TypeDecl>(IncompleteTyDecl));
+
+ if (NoteAttrLocation) {
+ // Note where the attribute is declared
+ auto AttrSrcRange = S.BoundsSafetySourceRangeFor(CATy);
+ S.Diag(AttrSrcRange.getBegin(), diag::note_named_attribute)
+ << CATy->GetAttributeName(/*WithMacroPrefix=*/true) << AttrSrcRange;
+ }
+
+ if (!IncompleteTyDecl)
+ return;
+
+ // If there's an associated forward declaration display it to emphasize
+ // why the type is incomplete (all we have is a forward declaration).
+
+ // Note the `IncompleteTyDecl` type is the underlying type which might not
+ // be the same as `CATy->getPointeeType()` which could be a typedef.
+ //
+ // The diagnostic printed will be at the location of the underlying type but
+ // the diagnostic text will print the type of `CATy->getPointeeType()` which
+ // could be a typedef name rather than the underlying type. This is ok
+ // though because the diagnostic will print the underlying type name too.
+ // E.g:
+ //
+ // `forward declaration of 'Incomplete_Struct_t'
+ // (aka 'struct IncompleteStructTy')`
+ //
+ // If this ends up being confusing we could emit a second diagnostic (one
+ // explaining where the typedef is) but that seems overly verbose.
+
+ S.Diag(IncompleteTyDecl->getBeginLoc(), diag::note_forward_declaration)
+ << CATy->getPointeeType();
+}
+
+static bool
+HasCountedByAttrOnIncompletePointee(QualType Ty, NamedDecl **ND,
+ const CountAttributedType **CATyOut,
+ QualType *PointeeTyOut) {
+ auto *CATy = Ty->getAs<CountAttributedType>();
+ if (!CATy)
+ return false;
+
+ // Incomplete pointee type is only a problem for
+ // counted_by/counted_by_or_null
+ if (CATy->isCountInBytes())
+ return false;
+
+ auto PointeeTy = CATy->getPointeeType();
+ if (PointeeTy.isNull())
+ return false; // Reachable?
+
+ if (!PointeeTy->isIncompleteType(ND))
+ return false;
+
+ if (CATyOut)
+ *CATyOut = CATy;
+ if (PointeeTyOut)
+ *PointeeTyOut = PointeeTy;
+ return true;
+}
+
+/// Perform Checks for assigning to a `__counted_by` or
+/// `__counted_by_or_null` pointer type \param LHSTy where the pointee type
+/// is incomplete which is invalid.
+///
+/// \param S The Sema instance.
+/// \param LHSTy The type being assigned to. Checks will only be performed if
+/// the type is a `counted_by` or `counted_by_or_null ` pointer.
+/// \param RHSExpr The expression being assigned from.
+/// \param Action The type assignment being performed
+/// \param Loc The SourceLocation to use for error diagnostics
+/// \param ComputeAssignee If provided this function will be called before
+/// emitting a diagnostic. The function should return the name of
+/// entity being assigned to or an empty string if this cannot be
+/// determined.
+///
+/// \returns True iff no diagnostic where emitted, false otherwise.
+static bool BoundsSafetyCheckAssignmentToCountAttrPtrWithIncompletePointeeTy(
+ Sema &S, QualType LHSTy, Expr *RHSExpr, Sema::AssignmentAction Action,
+ SourceLocation Loc, std::function<std::string()> ComputeAssignee) {
+ NamedDecl *IncompleteTyDecl = nullptr;
+ const CountAttributedType *CATy = nullptr;
+ QualType PointeeTy;
+ if (!HasCountedByAttrOnIncompletePointee(LHSTy, &IncompleteTyDecl, &CATy,
+ &PointeeTy))
+ return true;
+ assert(CATy && !CATy->isCountInBytes() && !PointeeTy.isNull());
+
+ // It's not expected that the diagnostic be emitted in these cases.
+ // It's not necessarily a problem but we should catch when this starts
+ // to happen.
+ assert(Action != Sema::AA_Converting && Action != Sema::AA_Sending &&
+ Action != Sema::AA_Casting && Action != Sema::AA_Passing_CFAudited);
+
+ // By having users provide a function we only pay the cost of allocation the
+ // string and computing when a diagnostic is emitted.
+ std::string Assignee = ComputeAssignee ? ComputeAssignee() : "";
+ {
+ auto D = S.Diag(Loc, diag::err_counted_by_on_incomplete_type_on_assign)
+ << /*0*/ (int)Action << /*1*/ Assignee
+ << /*2*/ (Assignee.size() > 0)
+ << /*3*/ isa<ImplicitValueInitExpr>(RHSExpr) << /*4*/ LHSTy
+ << /*5*/ CATy->GetAttributeName(/*WithMacroPrefix=*/true)
+ << /*6*/ PointeeTy << /*7*/ CATy->isOrNull();
----------------
Sirraide wrote:
```suggestion
<< (int)Action << Assignee
<< (Assignee.size() > 0)
<< isa<ImplicitValueInitExpr>(RHSExpr) << LHSTy
<< CATy->GetAttributeName(/*WithMacroPrefix=*/true)
<< PointeeTy << CATy->isOrNull();
```
I don’t think the numbers really add much of value here (it’s not that hard to count e.g. to the 5th parameter if you want to figure out what that’s being used for ;Þ)
https://github.com/llvm/llvm-project/pull/106321
More information about the cfe-commits
mailing list