hazohelet created this revision. hazohelet added reviewers: aaron.ballman, serge-sans-paille, nickdesaulniers, tbaeder. Herald added a project: All. hazohelet requested review of this revision. Herald added a project: clang.
This patch warns on `snprintf` calls whose `n` argument is known to be smaller than the size of the formatted string like char buf[5]; snprintf(buf, 5, "Hello"); This is a counterpart of gcc's `Wformat-truncation` Fixes https://github.com/llvm/llvm-project/issues/64871 Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D158562 Files: clang/docs/ReleaseNotes.rst clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/Sema/SemaChecking.cpp clang/test/Analysis/taint-generic.c clang/test/Sema/format-strings.c clang/test/Sema/warn-fortify-source.c
Index: clang/test/Sema/warn-fortify-source.c =================================================================== --- clang/test/Sema/warn-fortify-source.c +++ clang/test/Sema/warn-fortify-source.c @@ -87,6 +87,10 @@ char buf[10]; __builtin_snprintf(buf, 10, "merp"); __builtin_snprintf(buf, 11, "merp"); // expected-warning {{'snprintf' size argument is too large; destination buffer has size 10, but size argument is 11}} + __builtin_snprintf(buf, 0, "merp"); + __builtin_snprintf(buf, 3, "merp"); // expected-warning {{'snprintf' will always be truncated; specified size is 3, but format string expands to at least 5}} + __builtin_snprintf(buf, 4, "merp"); // expected-warning {{'snprintf' will always be truncated; specified size is 4, but format string expands to at least 5}} + __builtin_snprintf(buf, 5, "merp"); } void call_vsnprintf(void) { @@ -94,6 +98,10 @@ __builtin_va_list list; __builtin_vsnprintf(buf, 10, "merp", list); __builtin_vsnprintf(buf, 11, "merp", list); // expected-warning {{'vsnprintf' size argument is too large; destination buffer has size 10, but size argument is 11}} + __builtin_vsnprintf(buf, 0, "merp", list); + __builtin_vsnprintf(buf, 3, "merp", list); // expected-warning {{'vsnprintf' will always be truncated; specified size is 3, but format string expands to at least 5}} + __builtin_vsnprintf(buf, 4, "merp", list); // expected-warning {{'vsnprintf' will always be truncated; specified size is 4, but format string expands to at least 5}} + __builtin_vsnprintf(buf, 5, "merp", list); } void call_sprintf_chk(char *buf) { Index: clang/test/Sema/format-strings.c =================================================================== --- clang/test/Sema/format-strings.c +++ clang/test/Sema/format-strings.c @@ -202,7 +202,8 @@ printf("%s%lv%d","unix",10,20); // expected-warning {{invalid conversion specifier 'v'}} expected-warning {{data argument not used by format string}} fprintf(fp,"%%%l"); // expected-warning {{incomplete format specifier}} sprintf(buf,"%%%%%ld%d%d", 1, 2, 3); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} - snprintf(buf, 2, "%%%%%ld%;%d", 1, 2, 3); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} expected-warning {{invalid conversion specifier ';'}} expected-warning {{data argument not used by format string}} + snprintf(buf, 2, "%%%%%ld%;%d", 1, 2, 3); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} expected-warning {{invalid conversion specifier ';'}} expected-warning {{data argument not used by format string}} \ + // expected-warning{{'snprintf' will always be truncated; specified size is 2, but format string expands to at least 7}} } void check_null_char_string(char* b) Index: clang/test/Analysis/taint-generic.c =================================================================== --- clang/test/Analysis/taint-generic.c +++ clang/test/Analysis/taint-generic.c @@ -229,6 +229,7 @@ char addr[128]; scanf("%s", addr); __builtin_snprintf(buffern, 10, "/bin/mail %s < /tmp/email", addr); + // expected-warning@-1 {{'snprintf' will always be truncated; specified size is 10, but format string expands to at least 24}} system(buffern); // expected-warning {{Untrusted data is passed to a system call}} } Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -1131,6 +1131,24 @@ // Add 1 for null byte. return llvm::APSInt::getUnsigned(Result + 1).extOrTrunc(SizeTypeWidth); }; + auto ProcessFormatStringLiteral = + [&](const Expr *FormatExpr, StringRef &FormatStrRef, size_t &StrLen) { + if (auto *Format = dyn_cast<StringLiteral>(FormatExpr)) { + if (!Format->isOrdinary() && !Format->isUTF8()) + return false; + + FormatStrRef = Format->getString(); + const ConstantArrayType *T = + Context.getAsConstantArrayType(Format->getType()); + assert(T && "String literal not of constant array type!"); + size_t TypeSize = T->getSize().getZExtValue(); + // In case there's a null byte somewhere. + StrLen = + std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0)); + return true; + } + return false; + }; std::optional<llvm::APSInt> SourceSize; std::optional<llvm::APSInt> DestinationSize; @@ -1183,11 +1201,9 @@ const auto *FormatExpr = TheCall->getArg(FormatIndex)->IgnoreParenImpCasts(); - const auto *Format = dyn_cast<StringLiteral>(FormatExpr); - if (!Format) - return; - - if (!Format->isOrdinary() && !Format->isUTF8()) + StringRef FormatStrRef; + size_t StrLen; + if (!ProcessFormatStringLiteral(FormatExpr, FormatStrRef, StrLen)) return; auto Diagnose = [&](unsigned ArgIndex, unsigned DestSize, @@ -1200,21 +1216,11 @@ << DestSize << SourceSize); }; - StringRef FormatStrRef = Format->getString(); auto ShiftedComputeSizeArgument = [&](unsigned Index) { return ComputeSizeArgument(Index + DataIndex); }; ScanfDiagnosticFormatHandler H(ShiftedComputeSizeArgument, Diagnose); const char *FormatBytes = FormatStrRef.data(); - const ConstantArrayType *T = - Context.getAsConstantArrayType(Format->getType()); - assert(T && "String literal not of constant array type!"); - size_t TypeSize = T->getSize().getZExtValue(); - - // In case there's a null byte somewhere. - size_t StrLen = - std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0)); - analyze_format_string::ParseScanfString(H, FormatBytes, FormatBytes + StrLen, getLangOpts(), Context.getTargetInfo()); @@ -1230,22 +1236,11 @@ size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3; auto *FormatExpr = TheCall->getArg(FormatIndex)->IgnoreParenImpCasts(); - if (auto *Format = dyn_cast<StringLiteral>(FormatExpr)) { - - if (!Format->isOrdinary() && !Format->isUTF8()) - return; - - StringRef FormatStrRef = Format->getString(); + StringRef FormatStrRef; + size_t StrLen; + if (ProcessFormatStringLiteral(FormatExpr, FormatStrRef, StrLen)) { EstimateSizeFormatHandler H(FormatStrRef); const char *FormatBytes = FormatStrRef.data(); - const ConstantArrayType *T = - Context.getAsConstantArrayType(Format->getType()); - assert(T && "String literal not of constant array type!"); - size_t TypeSize = T->getSize().getZExtValue(); - - // In case there's a null byte somewhere. - size_t StrLen = - std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0)); if (!analyze_format_string::ParsePrintfString( H, FormatBytes, FormatBytes + StrLen, getLangOpts(), Context.getTargetInfo(), false)) { @@ -1327,6 +1322,26 @@ DiagID = diag::warn_fortify_source_size_mismatch; SourceSize = ComputeExplicitObjectSizeArgument(1); DestinationSize = ComputeSizeArgument(0); + const auto *FormatExpr = TheCall->getArg(/*Arg=*/2)->IgnoreParenImpCasts(); + StringRef FormatStrRef; + size_t StrLen; + if (SourceSize && + ProcessFormatStringLiteral(FormatExpr, FormatStrRef, StrLen)) { + EstimateSizeFormatHandler H(FormatStrRef); + const char *FormatBytes = FormatStrRef.data(); + if (!analyze_format_string::ParsePrintfString( + H, FormatBytes, FormatBytes + StrLen, getLangOpts(), + Context.getTargetInfo(), /*isFreeBSDKPrintf=*/false)) { + llvm::APSInt FormatSize = + llvm::APSInt::getUnsigned(H.getSizeLowerBound()) + .extOrTrunc(SizeTypeWidth); + if (FormatSize <= *SourceSize || *SourceSize == 0) + break; + DiagID = diag::warn_fortify_source_format_truncation; + DestinationSize = SourceSize; + SourceSize = FormatSize; + } + } break; } } Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -857,6 +857,11 @@ " but format string expands to at least %2">, InGroup<FortifySource>; +def warn_fortify_source_format_truncation: Warning< + "'%0' will always be truncated; specified size is %1," + " but format string expands to at least %2">, + InGroup<FortifySource>; + def warn_fortify_scanf_overflow : Warning< "'%0' may overflow; destination buffer in argument %1 has size " "%2, but the corresponding specifier may require size %3">, Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -144,6 +144,9 @@ tautologies like ``x && !x`` and ``!x || x`` in expressions. This also makes ``-Winfinite-recursion`` diagnose more cases. (`#56035: <https://github.com/llvm/llvm-project/issues/56035>`_). +- Clang's ``-Wfortify-source`` now diagnoses ``snprintf`` call that is known to + result in string truncation. + (`#64871: <https://github.com/llvm/llvm-project/issues/64871>`_). Bug Fixes in This Version -------------------------
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits