https://github.com/rohanjr updated https://github.com/llvm/llvm-project/pull/178107
>From f027dbc33385ae9059e3dc1e1bc87b08fecef758 Mon Sep 17 00:00:00 2001 From: Rohan Jacob-Rao <[email protected]> Date: Tue, 27 Jan 2026 01:34:25 +0000 Subject: [PATCH 1/2] [Clang][-Wunsafe-buffer-usage] Allow safe form of libc memset. This allows a common, memory-safe form of memset, `memset(&x, val, sizeof(x))`. There are potentially other safe forms that we can allow in later PRs. The tests cover some of the possible cases. --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 77 ++++++++++++++++++- ...arn-unsafe-buffer-usage-libc-functions.cpp | 53 +++++++++++++ 2 files changed, 129 insertions(+), 1 deletion(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 95316623f5d88..d889d4c0fabd6 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -800,6 +800,50 @@ static bool isNullTermPointer(const Expr *Ptr, ASTContext &Ctx) { return false; } +// Given an expression like `&x` or `std::addressof(x)`, returns the +// `DeclRefExpr` corresponding to `x`. Returns null if the expression `E` is not +// an address-of expression. +static const DeclRefExpr *getDeclRefInAddressOfExpr(const Expr *E) { + if (!E->getType()->isPointerType()) + return nullptr; + const Expr *Ptr = E->IgnoreParenImpCasts(); + + // `&x` where `x` is a DeclRefExpr. + if (const auto *UO = dyn_cast<UnaryOperator>(Ptr)) { + if (UO->getOpcode() != UnaryOperator::Opcode::UO_AddrOf) { + return nullptr; + } + return dyn_cast<DeclRefExpr>(UO->getSubExpr()->IgnoreParenImpCasts()); + } + + // `std::addressof(x)` where `x` is a DeclRefExpr. + if (const auto *CE = dyn_cast<CallExpr>(Ptr)) { + const FunctionDecl *FnDecl = CE->getDirectCallee(); + + if (!FnDecl || !FnDecl->isInStdNamespace() || + FnDecl->getNameAsString() != "addressof" || CE->getNumArgs() != 1) + return nullptr; + + return dyn_cast<DeclRefExpr>(CE->getArg(0)->IgnoreParenImpCasts()); + } + + return nullptr; +} + +// Given an expression like `sizeof(x)`, returns the `DeclRefExpr` corresponding +// to `x`. Returns null if the expression `E` is not a `sizeof` expression or is +// `sizeof(T)` for a type `T`. +static const DeclRefExpr *getDeclRefInSizeOfExr(const Expr *E) { + const auto *SizeOfExpr = + dyn_cast<UnaryExprOrTypeTraitExpr>(E->IgnoreParenImpCasts()); + if (!SizeOfExpr || SizeOfExpr->getKind() != UETT_SizeOf) + return nullptr; + if (SizeOfExpr->isArgumentType()) + return nullptr; + return dyn_cast<DeclRefExpr>( + SizeOfExpr->getArgumentExpr()->IgnoreParenImpCasts()); +} + namespace libc_func_matchers { // Under `libc_func_matchers`, define a set of matchers that match unsafe // functions in libc and unsafe calls to them. @@ -1057,7 +1101,6 @@ static bool isPredefinedUnsafeLibcFunc(const FunctionDecl &Node) { "wmemcpy", "memmove", "wmemmove", - "memset", "wmemset", // IO: "fread", @@ -1105,6 +1148,34 @@ static bool isPredefinedUnsafeLibcFunc(const FunctionDecl &Node) { return Name.ends_with("scanf"); } +// Returns true if this is an unsafe call to `memset`. +// The only call we currently consider safe is of the form +// `memset(&x, 0, sizeof(x))`, with possible variations in parentheses. +static bool isUnsafeMemset(const CallExpr &Node, ASTContext &Ctx) { + const FunctionDecl *FD = Node.getDirectCallee(); + assert(FD && "It should have been checked that FD is non-null."); + + const IdentifierInfo *II = FD->getIdentifier(); + if (!II || II->getName() != "memset") + return false; + + // If the number of parameters is unexpected, don't flag it as unsafe. + if (FD->getNumParams() < 3) + return false; + + // Now we have a real call to memset, it's considered unsafe unless it's in + // the form `memset(&x, 0, sizeof(x))`. + const DeclRefExpr *AddressOfVar = getDeclRefInAddressOfExpr(Node.getArg(0)); + if (!AddressOfVar) + return true; + + const DeclRefExpr *SizeOfVar = getDeclRefInSizeOfExr(Node.getArg(2)); + if (!SizeOfVar) + return true; + + return AddressOfVar->getDecl() != SizeOfVar->getDecl(); +} + // Match a call to one of the `v*printf` functions taking `va_list`. We cannot // check safety for these functions so they should be changed to their // non-va_list versions. @@ -2171,6 +2242,10 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget { Result.addNode(Tag, DynTypedNode::create(*CE)); return true; } + if (libc_func_matchers::isUnsafeMemset(*CE, Ctx)) { + Result.addNode(Tag, DynTypedNode::create(*CE)); + return true; + } if (libc_func_matchers::isUnsafeVaListPrintfFunc(*FD)) { Result.addNode(Tag, DynTypedNode::create(*CE)); Result.addNode(UnsafeVaListTag, DynTypedNode::create(*FD)); diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp index 8a8d3bd77711b..712207dcd98f9 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -79,6 +79,10 @@ namespace std { typedef basic_string_view<char> string_view; typedef basic_string_view<wchar_t> wstring_view; + + + template <typename T> + T *addressof(T& arg); } void f(char * p, char * q, std::span<char> s, std::span<char> s2) { @@ -375,3 +379,52 @@ void test_format_attr_invalid_arg_idx(char * Str, std::string StdStr) { myprintf(Str); // expected-warning{{formatting function 'myprintf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} myprintf(StdStr.c_str()); } + + +void *memset(void *s, int c, size_t n); +void *memset(void *s, size_t n); // invalid memset missing value parameter + +void testSafeMemsetWithUnaryAddressOf() { + int i = 0; + + // considered memory-safe since it's not the known memset with 3 parameters + memset(&i, 10); + + // memory-safe and in the form currently considered safe + memset(&i, 0, sizeof(i)); + memset(&i, 0, sizeof i); + memset(&(i), 0, sizeof(i)); + memset(&(i), 0, sizeof i); + + // memory-safe but not one of the forms currently considered safe + memset(&i, 0, 0); // expected-warning{{function 'memset' is unsafe}} + memset(&i, 0, 1); // expected-warning{{function 'memset' is unsafe}} + memset(&i, 0, sizeof(int)); // expected-warning{{function 'memset' is unsafe}} + memset(&(++i), 0, sizeof(i)); // expected-warning{{function 'memset' is unsafe}} + + // unsafe + memset(nullptr, 0, 10); // expected-warning{{function 'memset' is unsafe}} + memset(&i, 0, 10); // expected-warning{{function 'memset' is unsafe}} +} + +void testSafeMemsetWithStdAddressof() { + int i = 0; + + // considered memory-safe since it's not the known memset with 3 parameters + memset(std::addressof(i), 10); + + // memory-safe and in the form currently considered safe + memset(std::addressof(i), 0, sizeof(i)); + memset(std::addressof(i), 0, sizeof i); + memset(std::addressof((i)), 0, sizeof(i)); + memset(std::addressof((i)), 0, sizeof i); + + // memory-safe but not one of the forms currently considered safe + memset(std::addressof(i), 0, 0); // expected-warning{{function 'memset' is unsafe}} + memset(std::addressof(i), 0, 1); // expected-warning{{function 'memset' is unsafe}} + memset(std::addressof(i), 0, sizeof(int)); // expected-warning{{function 'memset' is unsafe}} + memset(std::addressof(++i), 0, sizeof(i)); // expected-warning{{function 'memset' is unsafe}} + + // unsafe + memset(std::addressof(i), 0, 10); // expected-warning{{function 'memset' is unsafe}} +} >From 6d1ec2d95de1afeed20f9fb9a4864f7a52b61dc8 Mon Sep 17 00:00:00 2001 From: Rohan Jacob-Rao <[email protected]> Date: Tue, 27 Jan 2026 22:38:25 +0000 Subject: [PATCH 2/2] Refactor helper functions for use in IsPtrBufferSafe --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 38 +++++++++++------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index d889d4c0fabd6..4d35dec3c0578 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -800,48 +800,44 @@ static bool isNullTermPointer(const Expr *Ptr, ASTContext &Ctx) { return false; } -// Given an expression like `&x` or `std::addressof(x)`, returns the -// `DeclRefExpr` corresponding to `x`. Returns null if the expression `E` is not -// an address-of expression. -static const DeclRefExpr *getDeclRefInAddressOfExpr(const Expr *E) { +// Given an expression like `&X` or `std::addressof(X)`, returns the `Expr` +// corresponding to `X` (after removing parens and implicit casts). +// Returns null if the input expression `E` is not an address-of expression. +static const Expr *getSubExprInAddressOfExpr(const Expr *E) { if (!E->getType()->isPointerType()) return nullptr; const Expr *Ptr = E->IgnoreParenImpCasts(); - // `&x` where `x` is a DeclRefExpr. + // `&X` where `X` is an `Expr`. if (const auto *UO = dyn_cast<UnaryOperator>(Ptr)) { - if (UO->getOpcode() != UnaryOperator::Opcode::UO_AddrOf) { + if (UO->getOpcode() != UnaryOperator::Opcode::UO_AddrOf) return nullptr; - } - return dyn_cast<DeclRefExpr>(UO->getSubExpr()->IgnoreParenImpCasts()); + return UO->getSubExpr()->IgnoreParenImpCasts(); } - // `std::addressof(x)` where `x` is a DeclRefExpr. + // `std::addressof(X)` where `X` is an `Expr`. if (const auto *CE = dyn_cast<CallExpr>(Ptr)) { const FunctionDecl *FnDecl = CE->getDirectCallee(); - if (!FnDecl || !FnDecl->isInStdNamespace() || FnDecl->getNameAsString() != "addressof" || CE->getNumArgs() != 1) return nullptr; - - return dyn_cast<DeclRefExpr>(CE->getArg(0)->IgnoreParenImpCasts()); + return CE->getArg(0)->IgnoreParenImpCasts(); } return nullptr; } -// Given an expression like `sizeof(x)`, returns the `DeclRefExpr` corresponding -// to `x`. Returns null if the expression `E` is not a `sizeof` expression or is -// `sizeof(T)` for a type `T`. -static const DeclRefExpr *getDeclRefInSizeOfExr(const Expr *E) { +// Given an expression like `sizeof(X)`, returns the `Expr` corresponding to `X` +// (after removing parens and implicit casts). Returns null if the expression +// `E` is not a `sizeof` expression or is `sizeof(T)` for a type `T`. +static const Expr *getSubExprInSizeOfExpr(const Expr *E) { const auto *SizeOfExpr = dyn_cast<UnaryExprOrTypeTraitExpr>(E->IgnoreParenImpCasts()); if (!SizeOfExpr || SizeOfExpr->getKind() != UETT_SizeOf) return nullptr; if (SizeOfExpr->isArgumentType()) return nullptr; - return dyn_cast<DeclRefExpr>( - SizeOfExpr->getArgumentExpr()->IgnoreParenImpCasts()); + return SizeOfExpr->getArgumentExpr()->IgnoreParenImpCasts(); } namespace libc_func_matchers { @@ -1165,11 +1161,13 @@ static bool isUnsafeMemset(const CallExpr &Node, ASTContext &Ctx) { // Now we have a real call to memset, it's considered unsafe unless it's in // the form `memset(&x, 0, sizeof(x))`. - const DeclRefExpr *AddressOfVar = getDeclRefInAddressOfExpr(Node.getArg(0)); + const auto *AddressOfVar = dyn_cast_if_present<DeclRefExpr>( + getSubExprInAddressOfExpr(Node.getArg(0))); if (!AddressOfVar) return true; - const DeclRefExpr *SizeOfVar = getDeclRefInSizeOfExr(Node.getArg(2)); + const auto *SizeOfVar = + dyn_cast_if_present<DeclRefExpr>(getSubExprInSizeOfExpr(Node.getArg(2))); if (!SizeOfVar) return true; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
