================ @@ -186,4 +218,370 @@ bool Sema::CheckCountedByAttrOnField(FieldDecl *FD, Expr *E, bool CountInBytes, return false; } +SourceRange Sema::BoundsSafetySourceRangeFor(const CountAttributedType *CATy) { + // Note: This implementation relies on `CountAttributedType` being unique. + // E.g.: + // + // struct Foo { + // int count; + // char* __counted_by(count) buffer; + // char* __counted_by(count) buffer2; + // }; + // + // The types of `buffer` and `buffer2` are unique. The types being + // unique means the SourceLocation of the `counted_by` expression can be used + // to find where the attribute was written. + + auto Fallback = CATy->getCountExpr()->getSourceRange(); + auto CountExprBegin = CATy->getCountExpr()->getBeginLoc(); + + // FIXME: We currently don't support the count expression being a macro + // itself. E.g.: + // + // #define ZERO 0 + // int* __counted_by(ZERO) x; + // + if (SourceMgr.isMacroBodyExpansion(CountExprBegin)) + return Fallback; + + auto FetchIdentifierTokenFromOffset = + [&](ssize_t Offset) -> std::optional<Token> { + SourceLocation OffsetLoc = CountExprBegin.getLocWithOffset(Offset); + Token Result; + if (Lexer::getRawToken(OffsetLoc, Result, SourceMgr, getLangOpts())) + return std::optional<Token>(); // Failed + + if (!Result.isAnyIdentifier()) + return std::optional<Token>(); // Failed + + return Result; // Success + }; + + auto CountExprEnd = CATy->getCountExpr()->getEndLoc(); + auto FindRParenTokenAfter = [&]() -> std::optional<Token> { + auto CountExprEndSpelling = SourceMgr.getSpellingLoc(CountExprEnd); + auto MaybeRParenTok = Lexer::findNextToken( + CountExprEndSpelling, getSourceManager(), getLangOpts()); + + if (!MaybeRParenTok.has_value()) + return std::nullopt; + + if (!MaybeRParenTok->is(tok::r_paren)) + return std::nullopt; + + return *MaybeRParenTok; + }; + + // Step back two characters to point at the last character of the attribute + // text. + // + // __counted_by(count) + // ^ ^ + // | | + // --- + auto MaybeLastAttrCharToken = FetchIdentifierTokenFromOffset(-2); + if (!MaybeLastAttrCharToken) + return Fallback; + + auto LastAttrCharToken = MaybeLastAttrCharToken.value(); + + if (LastAttrCharToken.getLength() > 1) { + // Special case: When the character is part of a macro the Token we get + // is the whole macro name (e.g. `__counted_by`). + if (LastAttrCharToken.getRawIdentifier() != + CATy->GetAttributeName(/*WithMacroPrefix=*/true)) + return Fallback; + + // Found the beginning of the `__counted_by` macro + SourceLocation Begin = LastAttrCharToken.getLocation(); + // Now try to find the closing `)` of the macro. + auto MaybeRParenTok = FindRParenTokenAfter(); + if (!MaybeRParenTok.has_value()) + return Fallback; + + return SourceRange(Begin, MaybeRParenTok->getLocation()); + } + + assert(LastAttrCharToken.getLength() == 1); + // The Token we got back is just the last character of the identifier. + // This means a macro is not being used and instead the attribute is being + // used directly. We need to find the beginning of the identifier. We support + // two cases: + // + // * Non-affixed version. E.g: `counted_by` + // * Affixed version. E.g.: `__counted_by__` + + // Try non-affixed version. E.g.: + // + // __attribute__((counted_by(count))) + // ^ ^ + // | | + // ------------ + + // +1 is for `(` + const ssize_t NonAffixedSkipCount = + CATy->GetAttributeName(/*WithMacroPrefix=*/false).size() + 1; + auto MaybeNonAffixedBeginToken = + FetchIdentifierTokenFromOffset(-NonAffixedSkipCount); + if (!MaybeNonAffixedBeginToken) + return Fallback; + + auto NonAffixedBeginToken = MaybeNonAffixedBeginToken.value(); + if (NonAffixedBeginToken.getRawIdentifier() == + CATy->GetAttributeName(/*WithMacroPrefix=*/false)) { + // Found the beginning of the `counted_by`-like attribute + auto SL = NonAffixedBeginToken.getLocation(); + + // Now try to find the closing `)` of the attribute + auto MaybeRParenTok = FindRParenTokenAfter(); + if (!MaybeRParenTok.has_value()) + return Fallback; + + return SourceRange(SL, MaybeRParenTok->getLocation()); + } + + // Try affixed version. E.g.: + // + // __attribute__((__counted_by__(count))) + // ^ ^ + // | | + // ---------------- + std::string AffixedTokenStr = + (llvm::Twine("__") + CATy->GetAttributeName(/*WithMacroPrefix=*/false) + + llvm::Twine("__")) + .str(); + // +1 is for `(` + // +4 is for the 4 `_` characters + const ssize_t AffixedSkipCount = + CATy->GetAttributeName(/*WithMacroPrefix=*/false).size() + 1 + 4; + auto MaybeAffixedBeginToken = + FetchIdentifierTokenFromOffset(-AffixedSkipCount); + if (!MaybeAffixedBeginToken) + return Fallback; + + auto AffixedBeginToken = MaybeAffixedBeginToken.value(); + if (AffixedBeginToken.getRawIdentifier() != AffixedTokenStr) + return Fallback; + + // Found the beginning of the `__counted_by__`-like like attribute. + auto SL = AffixedBeginToken.getLocation(); + // Now try to find the closing `)` of the attribute + auto MaybeRParenTok = FindRParenTokenAfter(); + if (!MaybeRParenTok.has_value()) + return Fallback; + + return SourceRange(SL, MaybeRParenTok->getLocation()); +} + +static void EmitIncompleteCountedByPointeeNotes(Sema &S, + const CountAttributedType *CATy, + NamedDecl *IncompleteTyDecl, + bool NoteAttrLocation = true) { + assert(IncompleteTyDecl == nullptr || isa<TypeDecl>(IncompleteTyDecl)); + + if (NoteAttrLocation) { + // Note where the attribute is declared + auto AttrSrcRange = S.BoundsSafetySourceRangeFor(CATy); + S.Diag(AttrSrcRange.getBegin(), diag::note_named_attribute) + << CATy->GetAttributeName(/*WithMacroPrefix=*/true) << AttrSrcRange; + } + + if (!IncompleteTyDecl) + return; + + // If there's an associated forward declaration display it to emphasize + // why the type is incomplete (all we have is a forward declaration). + + // Note the `IncompleteTyDecl` type is the underlying type which might not + // be the same as `CATy->getPointeeType()` which could be a typedef. + // + // The diagnostic printed will be at the location of the underlying type but + // the diagnostic text will print the type of `CATy->getPointeeType()` which + // could be a typedef name rather than the underlying type. This is ok + // though because the diagnostic will print the underlying type name too. + // E.g: + // + // `forward declaration of 'Incomplete_Struct_t' + // (aka 'struct IncompleteStructTy')` + // + // If this ends up being confusing we could emit a second diagnostic (one + // explaining where the typedef is) but that seems overly verbose. + + S.Diag(IncompleteTyDecl->getBeginLoc(), diag::note_forward_declaration) + << CATy->getPointeeType(); +} + +static bool +HasCountedByAttrOnIncompletePointee(QualType Ty, NamedDecl **ND, + const CountAttributedType **CATyOut, + QualType *PointeeTyOut) { + auto *CATy = Ty->getAs<CountAttributedType>(); + if (!CATy) + return false; + + // Incomplete pointee type is only a problem for + // counted_by/counted_by_or_null + if (CATy->isCountInBytes()) + return false; + + auto PointeeTy = CATy->getPointeeType(); + if (PointeeTy.isNull()) + return false; // Reachable? + + if (!PointeeTy->isIncompleteType(ND)) + return false; + + if (CATyOut) + *CATyOut = CATy; + if (PointeeTyOut) + *PointeeTyOut = PointeeTy; + return true; +} + +/// Perform Checks for assigning to a `__counted_by` or +/// `__counted_by_or_null` pointer type \param LHSTy where the pointee type +/// is incomplete which is invalid. +/// +/// \param S The Sema instance. +/// \param LHSTy The type being assigned to. Checks will only be performed if +/// the type is a `counted_by` or `counted_by_or_null ` pointer. +/// \param RHSExpr The expression being assigned from. +/// \param Action The type assignment being performed +/// \param Loc The SourceLocation to use for error diagnostics +/// \param ComputeAssignee If provided this function will be called before +/// emitting a diagnostic. The function should return the name of +/// entity being assigned to or an empty string if this cannot be +/// determined. +/// +/// \returns True iff no diagnostic where emitted, false otherwise. +static bool BoundsSafetyCheckAssignmentToCountAttrPtrWithIncompletePointeeTy( + Sema &S, QualType LHSTy, Expr *RHSExpr, Sema::AssignmentAction Action, + SourceLocation Loc, std::function<std::string()> ComputeAssignee) { + NamedDecl *IncompleteTyDecl = nullptr; + const CountAttributedType *CATy = nullptr; + QualType PointeeTy; + if (!HasCountedByAttrOnIncompletePointee(LHSTy, &IncompleteTyDecl, &CATy, + &PointeeTy)) + return true; + assert(CATy && !CATy->isCountInBytes() && !PointeeTy.isNull()); + + // It's not expected that the diagnostic be emitted in these cases. + // It's not necessarily a problem but we should catch when this starts + // to happen. + assert(Action != Sema::AA_Converting && Action != Sema::AA_Sending && + Action != Sema::AA_Casting && Action != Sema::AA_Passing_CFAudited); + + // By having users provide a function we only pay the cost of allocation the + // string and computing when a diagnostic is emitted. + std::string Assignee = ComputeAssignee ? ComputeAssignee() : ""; + { + auto D = S.Diag(Loc, diag::err_counted_by_on_incomplete_type_on_assign) + << /*0*/ (int)Action << /*1*/ Assignee + << /*2*/ (Assignee.size() > 0) + << /*3*/ isa<ImplicitValueInitExpr>(RHSExpr) << /*4*/ LHSTy + << /*5*/ CATy->GetAttributeName(/*WithMacroPrefix=*/true) + << /*6*/ PointeeTy << /*7*/ CATy->isOrNull(); + + if (RHSExpr->getSourceRange().isValid()) + D << RHSExpr->getSourceRange(); + } + + EmitIncompleteCountedByPointeeNotes(S, CATy, IncompleteTyDecl); + return false; // check failed +} + +bool Sema::BoundsSafetyCheckAssignmentToCountAttrPtr( + QualType LHSTy, Expr *RHSExpr, Sema::AssignmentAction Action, + SourceLocation Loc, std::function<std::string()> ComputeAssignee) { + return BoundsSafetyCheckAssignmentToCountAttrPtrWithIncompletePointeeTy( + *this, LHSTy, RHSExpr, Action, Loc, ComputeAssignee); +} + +bool Sema::BoundsSafetyCheckInitialization(const InitializedEntity &Entity, + const InitializationKind &Kind, + Sema::AssignmentAction Action, + QualType LHSType, Expr *RHSExpr) { + bool ChecksPassed = true; + auto SL = Kind.getLocation(); + + // Note: We don't call `BoundsSafetyCheckAssignmentToCountAttrPtr` here + // because we need conditionalize what is checked. In downstream + // Clang `counted_by` is supported on variable definitions and in that + // implementation an error diagnostic will be emitted on the variable + // definition if the pointee is an incomplete type. To avoid warning about the + // same problem twice (once when the variable is defined, once when Sema + // checks the initializer) we skip checking the initializer if it's a + // variable. + if (Action == Sema::AA_Initializing && + Entity.getKind() != InitializedEntity::EK_Variable) { + + if (!BoundsSafetyCheckAssignmentToCountAttrPtrWithIncompletePointeeTy( + *this, LHSType, RHSExpr, Action, SL, [&Entity]() -> std::string { + if (const auto *VD = + dyn_cast_or_null<ValueDecl>(Entity.getDecl())) { + return VD->getQualifiedNameAsString(); + } + return ""; + })) { + ChecksPassed = false; + + // It's not necessarily bad if this assert fails but we should catch + // if this happens. + assert(Entity.getKind() == InitializedEntity::EK_Member); + } + } + + return ChecksPassed; +} + +static bool BoundsSafetyCheckUseOfCountAttrPtrWithIncompletePointeeTy(Sema &S, ---------------- delcypher wrote:
My original thinking behind this was that `Sema::BoundsSafetyCheckUseOfCountAttrPtr()` would grow other time to check more things which is why things were separate out. However, for now it's fine to inline it and I can outline again if needed. https://github.com/llvm/llvm-project/pull/106321 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits