llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-tidy Author: Daniel Krupp (dkrupp) <details> <summary>Changes</summary> Adds a new check which warns for the usage of unbounded %s format string specifiers in the sprintf and scanf family functions, which may cause buffer overlfow. --- Patch is 24.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168691.diff 8 Files Affected: - (modified) clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp (+3) - (modified) clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt (+1) - (added) clang-tools-extra/clang-tidy/bugprone/UnsafeFormatStringCheck.cpp (+149) - (added) clang-tools-extra/clang-tidy/bugprone/UnsafeFormatStringCheck.h (+34) - (added) clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-format-string.rst (+73) - (added) clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unsafe-format-string/system-header-simulator.h (+57) - (added) clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-format-string.c (+243) - (added) clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-format-string.cpp (+58) ``````````diff diff --git a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp index 6859dc97c112a..15e8a6c3afb6a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp @@ -107,6 +107,7 @@ #include "UnintendedCharOstreamOutputCheck.h" #include "UniquePtrArrayMismatchCheck.h" #include "UnsafeFunctionsCheck.h" +#include "UnsafeFormatStringCheck.h" #include "UnusedLocalNonTrivialVariableCheck.h" #include "UnusedRaiiCheck.h" #include "UnusedReturnValueCheck.h" @@ -308,6 +309,8 @@ class BugproneModule : public ClangTidyModule { "bugprone-crtp-constructor-accessibility"); CheckFactories.registerCheck<UnsafeFunctionsCheck>( "bugprone-unsafe-functions"); + CheckFactories.registerCheck<UnsafeFormatStringCheck>( + "bugprone-unsafe-format-string"); CheckFactories.registerCheck<UnusedLocalNonTrivialVariableCheck>( "bugprone-unused-local-non-trivial-variable"); CheckFactories.registerCheck<UnusedRaiiCheck>("bugprone-unused-raii"); diff --git a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt index db1256d91d311..0e9439423ce5a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt @@ -108,6 +108,7 @@ add_clang_library(clangTidyBugproneModule STATIC UnhandledSelfAssignmentCheck.cpp UniquePtrArrayMismatchCheck.cpp UnsafeFunctionsCheck.cpp + UnsafeFormatStringCheck.cpp UnusedLocalNonTrivialVariableCheck.cpp UnusedRaiiCheck.cpp UnusedReturnValueCheck.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFormatStringCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnsafeFormatStringCheck.cpp new file mode 100644 index 0000000000000..cd8f6b85ee1e2 --- /dev/null +++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFormatStringCheck.cpp @@ -0,0 +1,149 @@ +//===--- UnsafeFormatStringCheck.cpp - clang-tidy -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UnsafeFormatStringCheck.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "llvm/Support/ConvertUTF.h" + +using namespace clang::ast_matchers; + +namespace clang::tidy::bugprone { + +UnsafeFormatStringCheck::UnsafeFormatStringCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + +void UnsafeFormatStringCheck::registerMatchers(MatchFinder *Finder) { + // Matches sprintf and scanf family functions in std namespace in C++ and + // globally in C. + auto VulnerableFunctions = + hasAnyName("sprintf", "vsprintf", "scanf", "fscanf", "sscanf", "vscanf", + "vfscanf", "vsscanf", "wscanf", "fwscanf", "swscanf", + "vwscanf", "vfwscanf", "vswscanf"); + Finder->addMatcher( + callExpr(callee(functionDecl(VulnerableFunctions, + anyOf(isInStdNamespace(), + hasParent(translationUnitDecl())))), + anyOf(hasArgument(0, stringLiteral().bind("format")), + hasArgument(1, stringLiteral().bind("format")))) + .bind("call"), + this); +} + +void UnsafeFormatStringCheck::check(const MatchFinder::MatchResult &Result) { + const auto *Call = Result.Nodes.getNodeAs<CallExpr>("call"); + const auto *Format = Result.Nodes.getNodeAs<StringLiteral>("format"); + + if (!Call || !Format) + return; + + std::string FormatString; + if (Format->getCharByteWidth() == 1) { + FormatString = Format->getString().str(); + } else if (Format->getCharByteWidth() == 2) { + // Handle wide strings by converting to narrow string for analysis + convertUTF16ToUTF8String(Format->getBytes(), FormatString); + } else if (Format->getCharByteWidth() == 4) { + // Handle wide strings by converting to narrow string for analysis + convertUTF32ToUTF8String(Format->getBytes(), FormatString); + } + + const auto *Callee = cast<FunctionDecl>(Call->getCalleeDecl()); + StringRef FunctionName = Callee->getName(); + + bool IsScanfFamily = FunctionName.contains("scanf"); + + if (!hasUnboundedStringSpecifier(FormatString, IsScanfFamily)) + return; + + auto Diag = diag(Call->getBeginLoc(), + IsScanfFamily + ? "format specifier '%%s' without field width may cause buffer overflow; consider using '%%Ns' where N limits input length" + : "format specifier '%%s' without precision may cause buffer overflow; consider using '%%.Ns' where N limits output length") + << Call->getSourceRange(); +} + +bool UnsafeFormatStringCheck::hasUnboundedStringSpecifier(StringRef Fmt, + bool IsScanfFamily) { + size_t Pos = 0; + size_t N = Fmt.size(); + while ((Pos = Fmt.find('%', Pos)) != StringRef::npos) { + if (Pos + 1 >= N) + break; + + // Skip %% + if (Fmt[Pos + 1] == '%') { + Pos += 2; + continue; + } + + size_t SpecPos = Pos + 1; + + // Skip flags + while (SpecPos < N && + (Fmt[SpecPos] == '-' || Fmt[SpecPos] == '+' || Fmt[SpecPos] == ' ' || + Fmt[SpecPos] == '#' || Fmt[SpecPos] == '0')) { + SpecPos++; + } + + // Check for field width + bool HasFieldWidth = false; + if (SpecPos < N && Fmt[SpecPos] == '*') { + HasFieldWidth = true; + SpecPos++; + } else { + while (SpecPos < N && isdigit(Fmt[SpecPos])) { + HasFieldWidth = true; + SpecPos++; + } + } + + // Check for precision + bool HasPrecision = false; + if (SpecPos < N && Fmt[SpecPos] == '.') { + SpecPos++; + if (SpecPos < N && Fmt[SpecPos] == '*') { + HasPrecision = true; + SpecPos++; + } else { + while (SpecPos < N && isdigit(Fmt[SpecPos])) { + HasPrecision = true; + SpecPos++; + } + } + } + + // Skip length modifiers + while (SpecPos < N && (Fmt[SpecPos] == 'h' || Fmt[SpecPos] == 'l' || + Fmt[SpecPos] == 'L' || Fmt[SpecPos] == 'z' || + Fmt[SpecPos] == 'j' || Fmt[SpecPos] == 't')) { + SpecPos++; + } + + // Check for 's' specifier + if (SpecPos < N && Fmt[SpecPos] == 's') { + if (IsScanfFamily) { + // For scanf family, field width provides protection + if (!HasFieldWidth) { + return true; + } + } else { + // For sprintf family, only precision provides protection + if (!HasPrecision) { + return true; + } + } + } + + Pos = SpecPos + 1; + } + + return false; +} + +} // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFormatStringCheck.h b/clang-tools-extra/clang-tidy/bugprone/UnsafeFormatStringCheck.h new file mode 100644 index 0000000000000..b61c7345a0484 --- /dev/null +++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFormatStringCheck.h @@ -0,0 +1,34 @@ +//===--- UnsafeFormatStringCheck.h - clang-tidy ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNSAFEFORMATSTRINGCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNSAFEFORMATSTRINGCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang::tidy::bugprone { + +/// Detects usage of vulnerable format string functions with unbounded %s +/// specifiers that can cause buffer overflows. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone/unsafe-format-string.html +class UnsafeFormatStringCheck : public ClangTidyCheck { +public: + UnsafeFormatStringCheck(StringRef Name, ClangTidyContext *Context); + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + +private: + bool hasUnboundedStringSpecifier(StringRef FormatString, bool IsScanfFamily); + std::string getSafeAlternative(StringRef FunctionName); +}; + +} // namespace clang::tidy::bugprone + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNSAFEFORMATSTRINGCHECK_H diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-format-string.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-format-string.rst new file mode 100644 index 0000000000000..9f7ee63fb57f9 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-format-string.rst @@ -0,0 +1,73 @@ +.. title:: clang-tidy - bugprone-unsafe-format-string + +bugprone-unsafe-format-string +============================== + +Detects usage of vulnerable format string functions with unbounded ``%s`` +specifiers that can cause buffer overflows. + +The check identifies calls to format string functions like ``sprintf``, ``scanf``, +and their variants that use ``%s`` format specifiers without proper limits. +This can lead to buffer overflow vulnerabilities when the input string is longer +than the destination buffer. + +Format Specifier Behavior +-------------------------- + +The check distinguishes between different function families: + +**scanf family functions**: Field width limits input length + - ``%s`` - unsafe (no limit) + - ``%99s`` - safe (reads at most 99 characters) + +**sprintf family functions**: Precision limits output length + - ``%s`` - unsafe (no limit) + - ``%99s`` - unsafe (minimum width, no maximum) + - ``%.99s`` - safe (outputs at most 99 characters) + - ``%10.99s`` - safe (minimum 10 chars, maximum 99 chars) + +Examples +-------- + +.. code-block:: c + + char buffer[100]; + const char* input = "user input"; + + // Unsafe sprintf usage + sprintf(buffer, "%s", input); // No limit + sprintf(buffer, "%99s", input); // Field width is minimum, not maximum + + // Safe sprintf usage + sprintf(buffer, "%.99s", input); // Precision limits to 99 chars + sprintf(buffer, "%10.99s", input); // Min 10, max 99 chars + + // Unsafe scanf usage + scanf("%s", buffer); // No limit + + // Safe scanf usage + scanf("%99s", buffer); // Field width limits to 99 chars + + // Safe alternative: use safer functions + snprintf(buffer, sizeof(buffer), "%s", input); + +Checked Functions +----------------- + +The check detects unsafe format strings in these functions: + +**sprintf family** (precision ``.N`` provides safety): +* ``sprintf``, ``vsprintf`` + +**scanf family** (field width ``N`` provides safety): +* ``scanf``, ``fscanf``, ``sscanf`` +* ``vscanf``, ``vfscanf``, ``vsscanf`` +* ``wscanf``, ``fwscanf``, ``swscanf`` +* ``vwscanf``, ``vfwscanf``, ``vswscanf`` + +Recommendations +--------------- + +* For ``sprintf`` family: Use precision specifiers (``%.Ns``) or ``snprintf`` +* For ``scanf`` family: Use field width specifiers (``%Ns``) +* Consider using safer string handling functions when possible diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unsafe-format-string/system-header-simulator.h b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unsafe-format-string/system-header-simulator.h new file mode 100644 index 0000000000000..fef2c10ae339a --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unsafe-format-string/system-header-simulator.h @@ -0,0 +1,57 @@ +#pragma clang system_header + +#ifdef __cplusplus +#define restrict /*restrict*/ +#endif + +#ifndef __cplusplus +typedef __WCHAR_TYPE__ wchar_t; +#endif + +typedef __typeof(sizeof(int)) size_t; +typedef long long __int64_t; +typedef __int64_t __darwin_off_t; +typedef __darwin_off_t fpos_t; +typedef int off_t; +typedef long ssize_t; + +typedef struct _FILE FILE; + +extern FILE *stdin; +extern FILE *stdout; +extern FILE *stderr; + +typedef __builtin_va_list va_list; +#define va_start(ap, param) __builtin_va_start(ap, param) +#define va_end(ap) __builtin_va_end(ap) +#define va_arg(ap, type) __builtin_va_arg(ap, type) +#define va_copy(dst, src) __builtin_va_copy(dst, src) + + +#ifdef __cplusplus +namespace std { +#endif +extern int fscanf ( FILE *restrict stream, const char *restrict format, ... ); +extern int scanf ( const char *restrict format, ... ); +extern int sscanf ( const char *restrict s, const char *restrict format, ...); +extern int vscanf( const char *restrict format, va_list vlist ); +extern int vfscanf ( FILE *restrict stream, const char *restrict format, va_list arg ); + +extern int vsscanf( const char *restrict buffer, const char *restrict format, va_list vlist ); +extern int vwscanf( const wchar_t* format, va_list vlist ); +extern int vfwscanf( FILE* stream, const wchar_t* format, va_list vlist ); +extern int vswscanf( const wchar_t* buffer, const wchar_t* format, va_list vlist ); +extern int swscanf (const wchar_t* ws, const wchar_t* format, ...); +extern int wscanf( const wchar_t *format, ... ); +extern int fwscanf( FILE *stream, const wchar_t *format, ... ); + +extern int printf( const char* format, ... ); +extern int sprintf( char* buffer, const char* format, ... ); +extern int vsprintf (char * s, const char * format, va_list arg ); +extern int vsnprintf (char * s, size_t n, const char * format, va_list arg ); +extern int fprintf( FILE* stream, const char* format, ... ); +extern int snprintf( char* restrict buffer, size_t bufsz, + const char* restrict format, ... ); +#ifdef __cplusplus +} //namespace std { +#endif \ No newline at end of file diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-format-string.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-format-string.c new file mode 100644 index 0000000000000..2e3077ecab35a --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-format-string.c @@ -0,0 +1,243 @@ +// RUN: %check_clang_tidy %s bugprone-unsafe-format-string %t -- -- -isystem %S/Inputs/unsafe-format-string + +#include <system-header-simulator.h> + +void test_sprintf() { + char buffer[100]; + const char* input = "user input"; + + /* Positive: unsafe %s without field width */ + sprintf(buffer, "%s", input); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without precision may cause buffer overflow; consider using '%.Ns' where N limits output length [bugprone-unsafe-format-string] + + /* Positive: field width doesn't prevent overflow in sprintf */ + sprintf(buffer, "%99s", input); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without precision may cause buffer overflow; consider using '%.Ns' where N limits output length [bugprone-unsafe-format-string] + + /* Positive: dynamic field width doesn't prevent overflow */ + sprintf(buffer, "%*s", 10, input); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without precision may cause buffer overflow; consider using '%.Ns' where N limits output length [bugprone-unsafe-format-string] + + /*Negative: precision limits string length */ + sprintf(buffer, "%.99s", input); + /* no-warning */ + + /*Negative: precision with field width */ + sprintf(buffer, "%1.99s", input); + /* no-warning */ + + /*Negative: dynamic precision */ + sprintf(buffer, "%.*s", 99, input); + /* no-warning */ + + /*Negative: field width with dynamic precision */ + sprintf(buffer, "%1.*s", 99, input); + /* no-warning */ + + /*Negative: dynamic field width with fixed precision */ + sprintf(buffer, "%*.99s", 10, input); + /* no-warning */ + + /*Negative: dynamic field width and precision */ + sprintf(buffer, "%*.*s", 10, 99, input); + /* no-warning */ + + /*Negative: other format specifiers are safe */ + sprintf(buffer, "%d %f", 42, 3.14); + /* no-warning */ +} + +void test_vsprintf() { + char buffer[100]; + va_list args; + + /* Positive: unsafe %s without field width */ + vsprintf(buffer, "%s", args); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without precision may cause buffer overflow; consider using '%.Ns' where N limits output length [bugprone-unsafe-format-string] + + /* Positive: field width doesn't prevent overflow in vsprintf */ + vsprintf(buffer, "%99s", args); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without precision may cause buffer overflow; consider using '%.Ns' where N limits output length [bugprone-unsafe-format-string] + + /* Positive: precision limits string length */ + vsprintf(buffer, "%.99s", args); + /* no-warning */ +} + +void test_vsnprintf(int count, ...) { + va_list args; + va_start(args, count); + char buffer[100]; + + /*Negative: vsnprintf is safe */ + vsnprintf(buffer, sizeof(buffer), "%99s", args); + /* no-warning */ + + va_end(args); +} + +void test_scanf() { + char buffer[100]; + + /* Positive: unsafe %s without field width */ + scanf("%s", buffer); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without field width may cause buffer overflow; consider using '%Ns' where N limits input length [bugprone-unsafe-format-string] + + /*Negative: safe %s with field width */ + scanf("%99s", buffer); + /* no-warning */ +} + +void test_fscanf() { + char buffer[100]; + FILE* file = 0; + + /* Positive: unsafe %s without field width */ + fscanf(file, "%s", buffer); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without field width may cause buffer overflow; consider using '%Ns' where N limits input length [bugprone-unsafe-format-string] + + /*Negative: safe %s with field width */ + fscanf(file, "%99s", buffer); + /* no-warning */ +} + +void test_sscanf(char *source) { + char buffer[100]; + + /* Positive: unsafe %s without field width */ + sscanf(source, "%s", buffer); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without field width may cause buffer overflow; consider using '%Ns' where N limits input length [bugprone-unsafe-format-string] + + /*Negative: safe %s with field width */ + sscanf(source, "%99s", buffer); + /* no-warning */ +} + +void test_vfscanf() { + FILE* file = 0; + va_list args; + + /* Positive: unsafe %s without field width */ + vfscanf(file, "%s", args); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without field width may cause buffer overflow; consider using '%Ns' where N limits input length [bugprone-unsafe-format-string] + + /*Negative: safe %s with field width */ + vfscanf(file, "%99s", args); + /* no-warning */ +} + +void test_vsscanf(char * source) { + va_list args; + + /* Positive: unsafe %s without field width */ + vsscanf(source, "%s", args); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without field width may cause buffer overflow; consider using '%Ns' where N limits input length [bugprone-unsafe-format-string] + + /*Negative: safe %s with field width */ + vsscanf(source, "%99s", args); + /* no-warning */ +} + +void test_vscanf() { + va_list args; + + /* Positive: unsafe %s without field width */ + vscanf("%s", args); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without field width may cause buffer overflow; consider using '%Ns' where N limits input length [bugprone-unsafe-format-string] + + /*Negative: safe %s with field width */ + vscanf("%99s", args); + /* no-warning */ +} + +void test_wscanf() { + wchar_t buffer[100]; + + /* Positive: unsafe %s without field width */ + wscanf(L"%s", buffer); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: format specifier '%s' without field width may cause buffer overflow; consider using '%Ns' where N limits input length [bugprone-unsafe-format-string] + + /*Negative: safe %s with field width */ + wscanf(L"%99s", buffer); + /* no-warning */ +} + +void test_fwscanf() { + wchar_t buffer[100]; + FILE* file = 0; + + /* Positive: unsafe %s without field width */ + fwscanf(file, ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/168691 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
