serge-sans-paille created this revision. serge-sans-paille added a reviewer: erik.pilkington. serge-sans-paille added a project: clang. Herald added subscribers: cfe-commits, dexonsmith.
Implement a pessimistic evaluator of the minimal required size for a buffer based on the format string, and couple that with the fortified version to emit a warning when the buffer size is lower than the lower bound computed from the format string. See the test cases for examples of warnings, and https://github.com/serge-sans-paille/llvm-project/pull/6/checks for the cross-platform validation. Note: The lower bound could be improved, but I'd rather do that in an incremental commit, if that's okay with the reviewers. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D71566 Files: clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/Sema/SemaChecking.cpp clang/test/Sema/warn-fortify-source.c
Index: clang/test/Sema/warn-fortify-source.c =================================================================== --- clang/test/Sema/warn-fortify-source.c +++ clang/test/Sema/warn-fortify-source.c @@ -96,6 +96,21 @@ __builtin_vsnprintf(buf, 11, "merp", list); // expected-warning {{'vsnprintf' size argument is too large; destination buffer has size 10, but size argument is 11}} } +void call_sprintf_chk(char *buf) { + __builtin___sprintf_chk(buf, 1, 2, "a%d", 9); + __builtin___sprintf_chk(buf, 1, 1, "a%d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} + __builtin___sprintf_chk(buf, 1, 4, "a%#x", 9); + __builtin___sprintf_chk(buf, 1, 3, "a%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}} + __builtin___sprintf_chk(buf, 1, 4, "a%p", (void *)9); + __builtin___sprintf_chk(buf, 1, 3, "a%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}} + __builtin___sprintf_chk(buf, 1, 3, "a%+d", 9); + __builtin___sprintf_chk(buf, 1, 2, "a%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}} + __builtin___sprintf_chk(buf, 1, 6, "a%5d", 9); + __builtin___sprintf_chk(buf, 1, 5, "a%5d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 5, but format string expands to at least 6}} + __builtin___sprintf_chk(buf, 1, 2, "%f", 9.f); + __builtin___sprintf_chk(buf, 1, 1, "a%f", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}} +} + #ifdef __cplusplus template <class> struct S { void mf() const { Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -308,6 +308,58 @@ return false; } +class EstimateSizeFormatHandler + : public analyze_format_string::FormatStringHandler { + size_t Size; + +public: + EstimateSizeFormatHandler(const StringLiteral *fexpr) + : Size(fexpr->getLength()) {} + + bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, + const char *startSpecifier, + unsigned specifierLen) override { + const analyze_format_string::OptionalAmount &FW = FS.getFieldWidth(); + if (FW.getHowSpecified() == analyze_format_string::OptionalAmount::Constant) + Size += FW.getConstantAmount(); + else + Size += FS.getConversionSpecifier().isAnyIntArg() || + FS.getConversionSpecifier().isDoubleArg(); + Size += FS.hasPlusPrefix() || FS.hasSpacePrefix(); + if (FS.hasAlternativeForm()) { + switch (FS.getConversionSpecifier().getKind()) { + default: + break; + case analyze_format_string::ConversionSpecifier::oArg: // leading 0 + case analyze_format_string::ConversionSpecifier::aArg: // force a . + case analyze_format_string::ConversionSpecifier::AArg: // force a . + case analyze_format_string::ConversionSpecifier::eArg: // force a . + case analyze_format_string::ConversionSpecifier::EArg: // force a . + case analyze_format_string::ConversionSpecifier::fArg: // force a . + case analyze_format_string::ConversionSpecifier::FArg: // force a . + case analyze_format_string::ConversionSpecifier::gArg: // force a . + case analyze_format_string::ConversionSpecifier::GArg: // force a . + Size += 1; + break; + case analyze_format_string::ConversionSpecifier::xArg: // leading 0x + case analyze_format_string::ConversionSpecifier::XArg: // leading 0x + Size += 2; + break; + } + } + switch (FS.getConversionSpecifier().getKind()) { + case analyze_format_string::ConversionSpecifier::pArg: // leading 0x + Size += 3; + break; + default: + break; + } + Size -= specifierLen; + return true; + } + + size_t getSizeLowerBound() const { return Size; } +}; /// Check a call to BuiltinID for buffer overflows. If BuiltinID is a /// __builtin_*_chk function, then use the object size argument specified in the @@ -328,10 +380,35 @@ unsigned DiagID = 0; bool IsChkVariant = false; + llvm::APSInt UsedSize = llvm::APSInt::get(-1); unsigned SizeIndex, ObjectIndex; switch (BuiltinID) { default: return; + case Builtin::BI__builtin___sprintf_chk: { + if (auto *StrE = dyn_cast<StringLiteral>( + TheCall->getArg(3)->IgnoreParenImpCasts())) { + EstimateSizeFormatHandler H(StrE); + StringRef StrRef = StrE->getString(); + const char *Str = StrRef.data(); + const ConstantArrayType *T = + Context.getAsConstantArrayType(StrE->getType()); + assert(T && "String literal not of constant array type!"); + size_t TypeSize = T->getSize().getZExtValue(); + size_t StrLen = + std::min(std::max(TypeSize, size_t(1)) - 1, StrRef.size()); + if (!analyze_format_string::ParsePrintfString( + H, Str, Str + StrLen, getLangOpts(), Context.getTargetInfo(), + false)) { + DiagID = diag::warn_fortify_source_format_overflow; + IsChkVariant = true; + UsedSize = H.getSizeLowerBound(); + ObjectIndex = 2; + break; + } + } + return; + } case Builtin::BI__builtin___memcpy_chk: case Builtin::BI__builtin___memmove_chk: case Builtin::BI__builtin___memset_chk: @@ -427,11 +504,13 @@ } // Evaluate the number of bytes of the object that this call will use. - Expr::EvalResult Result; - Expr *UsedSizeArg = TheCall->getArg(SizeIndex); - if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext())) - return; - llvm::APSInt UsedSize = Result.Val.getInt(); + if (UsedSize == -1) { + Expr::EvalResult Result; + Expr *UsedSizeArg = TheCall->getArg(SizeIndex); + if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext())) + return; + UsedSize = Result.Val.getInt(); + } if (UsedSize.ule(ObjectSize)) return; @@ -8867,6 +8946,7 @@ S.Context.getTargetInfo(), Type == Sema::FST_FreeBSDKPrintf)) H.DoneProcessing(); + } else if (Type == Sema::FST_Scanf) { CheckScanfHandler H(S, FExpr, OrigFormatExpr, Type, firstDataArg, numDataArgs, Str, HasVAListArg, Args, format_idx, Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -741,6 +741,12 @@ "'%0' size argument is too large; destination buffer has size %1," " but size argument is %2">, InGroup<FortifySource>; +def warn_fortify_source_format_overflow : Warning< + "'%0' will always overflow; destination buffer has size %1," + " but format string expands to at least %2">, + InGroup<FortifySource>; + + /// main() // static main() is not an error in C, just in C++. def warn_static_main : Warning<"'main' should not be declared static">,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits