https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/175602
From 9c44f337a7ed22bb986aadb8107f1dfa905a0054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <[email protected]> Date: Mon, 8 Dec 2025 16:18:08 +0100 Subject: [PATCH 1/8] [analyzer] Add optin.core.UnconditionalVAArg Add a new optin checker which reports variadic functions that unconditionally use `va_arg()`. It would be undefined behavior to call such functions without passing any variadic arguments, so the SEI-CERT rule EXP47-C says that this pattern should be avoided. As this part of this SEI-CERT rule focuses on a very narrow area, I aimed to write a simple and stable checker that can report basic "clear" occurrences of this fault. It would be possible to cover some additional corner cases, but it isn't worth the effort. --- clang/docs/analyzer/checkers.rst | 42 ++++ .../clang/StaticAnalyzer/Checkers/Checkers.td | 5 + .../Core/BugReporter/BugReporter.h | 4 + .../StaticAnalyzer/Checkers/CMakeLists.txt | 1 + .../Checkers/UnconditionalVAArgChecker.cpp | 161 +++++++++++++++ .../StaticAnalyzer/Checkers/VAListChecker.cpp | 2 +- clang/test/Analysis/unconditional-va_arg.c | 183 ++++++++++++++++++ .../lib/StaticAnalyzer/Checkers/BUILD.gn | 1 + 8 files changed, 398 insertions(+), 1 deletion(-) create mode 100644 clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp create mode 100644 clang/test/Analysis/unconditional-va_arg.c diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 31edf9e99dc7d..a6ce509f9e317 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -842,6 +842,48 @@ of this Clang attribute. Projects that use this pattern should not enable this optin checker. +.. _optin-core-UnconditionalVAArg: + +optin.core.UnconditionalVAArg (C, C++) +"""""""""""""""""""""""""""""""""""""" +Check for variadic functions that unconditionally use ``va_arg()``. It is +possible to use such functions safely (as long as they always receive at least +one variadic argument), but their careless use can lead to undefined behavior +(trying to access a variadic argument when there isn't any), so it is better to +avoid them. + +**Note:** This checker only inspects the *definition* of the variadic functions +and reports those that *would* fail if they were called with no variadic +arguments -- even if there is no such call in the codebase. + +This design rule is dictated by the SEI CERT rule `EXP47-C +<https://wiki.sei.cmu.edu/confluence/display/c/EXP47-C.+Do+not+call+va_arg+with+an+argument+of+the+incorrect+type>`_, +which describes several issues related to the use of ``va_arg()``. (The problem +reported by this checker is shown in the second code example; the first, +unrelated code example is covered by the clang diagnostic ``-Wvarargs``.) + +.. code-block:: cpp + + // This function expects a list of variadic arguments terminated by a NULL pointer. + void log_message(const char *msg, ...) { + va_list va; + const char *arg; + printf("%s\n", msg); + va_start(va, msg); + while ((arg = va_arg(va, const char *))) { + // warn: calls to 'log_message' always reach this va_arg() expression + printf(" * %s\n", arg); + } + va_end(va); + } + +Instead of this bugprone pattern, the SEI-CERT coding standard suggests +approaches where the number of variadic arguments can be specified in a +different manner -- for example, it can be encoded in the initial parameter +like ``void foo(size_t num_varargs, ...)``. Those approaches are still not +foolproof (e.g. ``foo(3, "a")`` will be undefined behavior), but they reduce +the chances of an accidental mistake. + .. _optin-cplusplus-UninitializedObject: optin.cplusplus.UninitializedObject (C++) diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index ffae3b9310979..051b0033a6f96 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -416,6 +416,11 @@ def EnumCastOutOfRangeChecker : Checker<"EnumCastOutOfRange">, HelpText<"Check integer to enumeration casts for out of range values">, Documentation<HasDocumentation>; +def UnconditionalVAArgChecker + : Checker<"UnconditionalVAArg">, + HelpText<"Check variadic functions unconditionally using va_arg">, + Documentation<HasDocumentation>; + } // end "optin.core" //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h index f6a023368f3d2..3546e203b715f 100644 --- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h +++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h @@ -256,6 +256,10 @@ class BasicBugReport : public BugReport { BasicBugReport(const BugType &bt, StringRef desc, PathDiagnosticLocation l) : BugReport(Kind::Basic, bt, desc), Location(l) {} + BasicBugReport(const BugType &BT, StringRef ShortDesc, StringRef Desc, + PathDiagnosticLocation L) + : BugReport(Kind::Basic, BT, ShortDesc, Desc), Location(L) {} + static bool classof(const BugReport *R) { return R->getKind() == Kind::Basic; } diff --git a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt index 29d2c4512d470..6c5e4b72cc082 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -115,6 +115,7 @@ add_clang_library(clangStaticAnalyzerCheckers TraversalChecker.cpp TrustNonnullChecker.cpp TrustReturnsNonnullChecker.cpp + UnconditionalVAArgChecker.cpp UndefBranchChecker.cpp UndefCapturedBlockVarChecker.cpp UndefResultChecker.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp new file mode 100644 index 0000000000000..dc8549d1ec960 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp @@ -0,0 +1,161 @@ +//== UnconditionalVAArgChecker.cpp ------------------------------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines a checker to detect functions that unconditionally call +// va_arg() and would fail if they were called with zero variadic arguments. +// +// This checker is only partially path-sensitive: it relies on the symbolic +// execution of the analyzer engine to follow the execution path from the +// beginning of a function to a va_arg() call and determine whether there are +// any branching points on that path -- but it produces BasicBug reports +// because report path wouldn't contain any useful information. (As this +// checker detects a property of a certain function, the path before that +// function is irrelevant; and the unconditional path within the function is +// trivial and wouldn't have any note tags anyway.) +// +// I also attempted to implement this checker in Clang Tidy, but the AST +// matching is simply not powerful enough to express this "no branching on the +// path" relationship -- I would have needed to reinvent the wheel. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace clang; +using namespace ento; +using llvm::formatv; + +/// Either nullptr or a function under symbolic execution; a non-null value +/// means that the analyzer didn't see any branching points from the beginning +/// of that function until the current location. +REGISTER_TRAIT_WITH_PROGRAMSTATE(HasUnconditionalPath, const FunctionDecl *); + +namespace { +class UnconditionalVAArgChecker + : public Checker<check::BeginFunction, check::EndFunction, + check::BranchCondition, check::PreStmt<VAArgExpr>> { + const BugType BT{this, "Unconditional use of va_arg()", + categories::MemoryError}; + + static const FunctionDecl *getCurrentFunction(CheckerContext &C); + +public: + void checkBeginFunction(CheckerContext &C) const; + void checkEndFunction(const ReturnStmt *, CheckerContext &C) const; + void checkBranchCondition(const Stmt *Condition, CheckerContext &C) const; + void checkPreStmt(const VAArgExpr *VAA, CheckerContext &C) const; +}; +} // end anonymous namespace + +const FunctionDecl * +UnconditionalVAArgChecker::getCurrentFunction(CheckerContext &C) { + const Decl *FD = C.getLocationContext()->getDecl(); + return dyn_cast_if_present<FunctionDecl>(FD); +} + +void UnconditionalVAArgChecker::checkBeginFunction(CheckerContext &C) const { + // We only look for unconditional va_arg() use in variadic functions. + // Functions that take a va_list argument are just parts of the argument + // handling, it is more natural for them to have preconditions. + const FunctionDecl *FD = getCurrentFunction(C); + if (FD && FD->isVariadic()) { + // If a variadic function is inlined in the body of another variadic + // function, this overwrites the path tracking for the outer function. As + // this situation is fairly rare and it is very unlikely that the "big" + // outer function still has an unconditional path, there is no need to + // write more complex logic that handles this. + C.addTransition(C.getState()->set<HasUnconditionalPath>(FD)); + } +} + +void UnconditionalVAArgChecker::checkEndFunction(const ReturnStmt *, + CheckerContext &C) const { + // This callback is just for the sake of cleanliness, to remove data from the + // State after it becomes irrelevant. This checker would function perfectly + // correctly without this callback, and the impact on other checkers is also + // extremely limited (presence of extra metadata might prevent the + // unification of execution paths in some very rare situations). + ProgramStateRef State = C.getState(); + const FunctionDecl *FD = getCurrentFunction(C); + if (FD && FD == State->get<HasUnconditionalPath>()) { + State = State->set<HasUnconditionalPath>(nullptr); + C.addTransition(State); + } +} + +void UnconditionalVAArgChecker::checkBranchCondition(const Stmt *Condition, + CheckerContext &C) const { + // After evaluating a branch condition, the analyzer (which examines + // execution paths individually) won't be able to conclude that the function + // _unconditionally_ uses va_arg() -- so this callback resets the field + // HasUnconditionalPath in the state. + // NOTES: + // 1. This is the right thing to do even if the analyzer sees that _in the + // current state_ the execution can only continue in one direction. For + // example, if the variadic function isn't the entrypont, then the parameters + // recieved from the caller may guarantee that va_arg() is uses -- but this + // does not mean that the function _unconditionally_ uses va_arg(). + // 2. After other kinds of state splits (e.g. EagerlyAssueme, callbacks of + // StdLibraryFunctions separating different cases for the behavior of a + // library function etc.) the different execution paths will follow the same + // code (until they hit a branch condition), so it is reasonable (although + // not always correct) to assume that a va_arg() reached after those state + // splits is still _unconditionally_ reached if there were no branching + // statements. + // 3. This checker activates _after_ the evaluation of the branch condition, + // so va_arg() in the branch condition can be unconditionally reached. + C.addTransition(C.getState()->set<HasUnconditionalPath>(nullptr)); +} + +void UnconditionalVAArgChecker::checkPreStmt(const VAArgExpr *VAA, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const FunctionDecl *FD = getCurrentFunction(C); + if (FD && FD == State->get<HasUnconditionalPath>()) { + // Reset this field in the state to ensure that multiple consecutive + // va_arg() calls don't produce repeated warnings. + C.addTransition(State->set<HasUnconditionalPath>(nullptr)); + + IdentifierInfo *II = FD->getIdentifier(); + if (!II) + return; + StringRef FN = II->getName(); + + std::string FullMsg = formatv( + "Calls to '{0}' always reach this va_arg() expression, so calling " + "'{0}' with no variadic arguments would be undefined behavior", + FN); + SourceRange SR = VAA->getSourceRange(); + PathDiagnosticLocation PDL(SR.getBegin(), + C.getASTContext().getSourceManager()); + + // We create a non-path-sensitive report because the path wouldn't contain + // any useful information: the path leading to this function is actively + // ignored by the checker and the path within the function is trivial. + auto R = + std::make_unique<BasicBugReport>(BT, BT.getDescription(), FullMsg, PDL); + R->addRange(SR); + R->setDeclWithIssue(FD); + C.emitReport(std::move(R)); + } +} + +void ento::registerUnconditionalVAArgChecker(CheckerManager &Mgr) { + Mgr.registerChecker<UnconditionalVAArgChecker>(); +} + +bool ento::shouldRegisterUnconditionalVAArgChecker(const CheckerManager &) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp index 503fa5de868f2..e81c8bfa94cb1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This defines checkers which detect usage of uninitialized va_list values +// This defines a checker which detects usage of uninitialized va_list values // and va_start calls with no matching va_end. // //===----------------------------------------------------------------------===// diff --git a/clang/test/Analysis/unconditional-va_arg.c b/clang/test/Analysis/unconditional-va_arg.c new file mode 100644 index 0000000000000..2f6e2875df021 --- /dev/null +++ b/clang/test/Analysis/unconditional-va_arg.c @@ -0,0 +1,183 @@ +// RUN: %clang_analyze_cc1 -triple hexagon-unknown-linux -verify %s \ +// RUN: -analyzer-checker=core,optin.core.UnconditionalVAArg \ +// RUN: -analyzer-disable-checker=core.CallAndMessage \ +// RUN: -analyzer-output=text +// +// RUN: %clang_analyze_cc1 -triple x86_64-pc-linux-gnu -verify %s \ +// RUN: -analyzer-checker=core,optin.core.UnconditionalVAArg \ +// RUN: -analyzer-disable-checker=core.CallAndMessage \ +// RUN: -analyzer-output=text + +#include "Inputs/system-header-simulator-for-valist.h" +int printf(const char *format, ...); +void abort(void); + +void log_message(const char *msg, ...) { + // This artifical but plausible example function expects that the list of + // variadic arguments is terminated by a NULL pointer. Although careful use + // of this function is well-defined, passing no variadic arguments would + // trigger undefined behavior, so it may be better to choose a different way + // to mark the count of variadic arguments. + va_list va; + const char *arg; + printf("%s\n", msg); + va_start(va, msg); + while ((arg = va_arg(va, const char *))) { + // expected-warning@-1 {{Unconditional use of va_arg()}} + // expected-note@-2 {{Calls to 'log_message' always reach this va_arg() expression, so calling 'log_message' with no variadic arguments would be undefined behavior}} + printf(" * %s\n", arg); + } + va_end(va); +} + +void simple(int fst, ...) { + // This function is not called in this test file. + va_list va; + va_start(va, fst); + (void)va_arg(va, int); // expected-warning{{Unconditional use of va_arg()}} + // expected-note@-1 {{Calls to 'simple' always reach this va_arg() expression, so calling 'simple' with no variadic arguments would be undefined behavior}} + va_end(va); +} + +void used_with_varargs(int fst, ...) { + // This function is identical to simple(), but it is used and the call passes + // several variadic arguments. + va_list va; + va_start(va, fst); + (void)va_arg(va, int); // expected-warning{{Unconditional use of va_arg()}} + // expected-note@-1 {{Calls to 'used_with_varargs' always reach this va_arg() expression, so calling 'used_with_varargs' with no variadic arguments would be undefined behavior}} + va_end(va); +} + +void caller1(void) { + used_with_varargs(1, 2, 3, 4); +} + +void used_without_varargs(int fst, ...) { + // This function is identical to simple(), but it is used and the call passes + // no variadic arguments. + va_list va; + va_start(va, fst); + (void)va_arg(va, int); // expected-warning{{Unconditional use of va_arg()}} + // expected-note@-1 {{Calls to 'used_without_varargs' always reach this va_arg() expression, so calling 'used_without_varargs' with no variadic arguments would be undefined behavior}} + // FIXME: Here the checker should mention that this function is _actually_ + // called with no variadic arguments. + va_end(va); +} + +void caller2(void) { + used_without_varargs(1); +} + +void multiple_va_arg_calls(int fst, ...) { + // This function is similar to simple() but calls va_arg() multiple times. To + // avoid spamming the user, only the first use is reported. + va_list va; + va_start(va, fst); + (void)va_arg(va, int); // expected-warning{{Unconditional use of va_arg()}} + // expected-note@-1 {{Calls to 'multiple_va_arg_calls' always reach this va_arg() expression, so calling 'multiple_va_arg_calls' with no variadic arguments would be undefined behavior}} + (void)va_arg(va, int); // no-warning + (void)va_arg(va, int); // no-warning + va_end(va); +} + + +void has_conditional_return(int fst, ...) { + // The va_arg call is not always executed, so don't report this function. + // Actually understanding the conditional logic is infeasible, so the checker + // should accept any conditional logic that sometimes avoids the va_arg(). + va_list va; + if (fst < 0) + return; + va_start(va, fst); + (void)va_arg(va, int); // no-warning + va_end(va); +} + +void has_conditional_logic(int fst, ...) { + // As static analyzer only follows one execution path, it cannot see that + // va_arg() is always executed in this function. Theoretically it would be + // better to produce a report here, but implementing this would be a very + // ineffective use of development effort. + va_list va; + if (fst < 0) + fst = -fst; + va_start(va, fst); + (void)va_arg(va, int); // no-warning + va_end(va); +} + +void caller_has_conditional_logic(int fst, ...) { + // This function is identical to simple(), but it is used by a function that + // performs a state split before the call. + // This test validates that state splits _before_ the variadic call (which + // are irrelevant) do not influence the report creation. This is basically a + // workaround for the fact that not all functions are entrypoints and before + // reaching "our" variadic function the analyzer may go through arbitrary + // irrelevant code. + va_list va; + va_start(va, fst); + (void)va_arg(va, int); // expected-warning{{Unconditional use of va_arg()}} + // expected-note@-1 {{Calls to 'caller_has_conditional_logic' always reach this va_arg() expression, so calling 'caller_has_conditional_logic' with no variadic arguments would be undefined behavior}} + va_end(va); +} + +void caller_with_conditional_logic(int flag) { + if (flag) + caller_has_conditional_logic(1, 2, 3); +} + + +void validate_argument(int fst) { + if (fst <= 0) { + printf("Negative first argument is illegal: %d!\n", fst); + abort(); + } +} + +void noreturn_in_function(int fst, ...) { + // This function is very similar to simple(), but calls a function which may + // call the noreturn function abort() before reaching the va_arg() call, so + // the checker doesn't produce a warning. This is important e.g. for cases + // when a function uses assertions to mark preconditions. + va_list va; + validate_argument(fst); + va_start(va, fst); + (void)va_arg(va, int); // no-warning + va_end(va); +} + +void print_negative(int fst) { + if (fst <= 0) { + printf("First argument is negative: %d!\n", fst); + } +} + +void conditional_in_function(int fst, ...) { + // This function is very similar to noreturn_in_function(), but the called + // function always returns. However, the analyzer doesn't see this difference + // (because it follows a _single_ execution path and cannot see the + // alternative) so the checker behaves as in noreturn_in_function. + va_list va; + print_negative(fst); + va_start(va, fst); + (void)va_arg(va, int); // no-warning + va_end(va); +} + +void defined_in_different_tu(int); + +void unknown_call(int fst, ...) { + // This function is very similar to noreturn_in_function() or + // conditional_in_function() but the definition of the called function is not + // visible for the analyzer. In theis situation the analyzer ignores the call + // and reports the va_arg() expression that unconditionally appears after it. + // This would be incorrect behavior only in a situation when the called + // function can return _and_ can be noreturn which should be fairly rare. + va_list va; + defined_in_different_tu(fst); + va_start(va, fst); + (void)va_arg(va, int); // expected-warning{{Unconditional use of va_arg()}} + // expected-note@-1 {{Calls to 'unknown_call' always reach this va_arg() expression, so calling 'unknown_call' with no variadic arguments would be undefined behavior}} + va_end(va); +} diff --git a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn index 8869d432c5b65..25eb409aebd3f 100644 --- a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn @@ -123,6 +123,7 @@ static_library("Checkers") { "TraversalChecker.cpp", "TrustNonnullChecker.cpp", "TrustReturnsNonnullChecker.cpp", + "UnconditionalVAArgChecker.cpp", "UndefBranchChecker.cpp", "UndefCapturedBlockVarChecker.cpp", "UndefResultChecker.cpp", From 9535a94c8e8c4de68964f85163a7cacd4f73a142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <[email protected]> Date: Mon, 12 Jan 2026 20:04:06 +0100 Subject: [PATCH 2/8] Remove superfluous semicolon --- clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp index dc8549d1ec960..b0ce8ae3cf379 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp @@ -40,7 +40,7 @@ using llvm::formatv; /// Either nullptr or a function under symbolic execution; a non-null value /// means that the analyzer didn't see any branching points from the beginning /// of that function until the current location. -REGISTER_TRAIT_WITH_PROGRAMSTATE(HasUnconditionalPath, const FunctionDecl *); +REGISTER_TRAIT_WITH_PROGRAMSTATE(HasUnconditionalPath, const FunctionDecl *) namespace { class UnconditionalVAArgChecker From 4de3336b9af26f0a2840a225448e4dd43627476f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <[email protected]> Date: Tue, 13 Jan 2026 17:16:04 +0100 Subject: [PATCH 3/8] Reword last paragraph of file doc comment --- .../StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp index b0ce8ae3cf379..154b7f52384db 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp @@ -18,9 +18,9 @@ // function is irrelevant; and the unconditional path within the function is // trivial and wouldn't have any note tags anyway.) // -// I also attempted to implement this checker in Clang Tidy, but the AST -// matching is simply not powerful enough to express this "no branching on the -// path" relationship -- I would have needed to reinvent the wheel. +// However, the AST matching framework of Clang Tidy is not powerful enough to +// express this "no branching on the execution path" relationship, at least not +// without reimplementing a crude and fragile form of symbolic execution. // //===----------------------------------------------------------------------===// From 7feefc63ab7b84342a7e9ad4711d09554efe301b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <[email protected]> Date: Tue, 13 Jan 2026 18:31:08 +0100 Subject: [PATCH 4/8] Improve comment in checkBranchCondition --- .../StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp index 154b7f52384db..6ad921e21c094 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp @@ -98,14 +98,14 @@ void UnconditionalVAArgChecker::checkEndFunction(const ReturnStmt *, void UnconditionalVAArgChecker::checkBranchCondition(const Stmt *Condition, CheckerContext &C) const { // After evaluating a branch condition, the analyzer (which examines - // execution paths individually) won't be able to conclude that the function - // _unconditionally_ uses va_arg() -- so this callback resets the field - // HasUnconditionalPath in the state. + // execution paths individually) won't be able to find a va_arg() expression + // that is _unconditionally_ reached -- so this callback resets the state + // trait HasUnconditionalPath. // NOTES: // 1. This is the right thing to do even if the analyzer sees that _in the // current state_ the execution can only continue in one direction. For // example, if the variadic function isn't the entrypont, then the parameters - // recieved from the caller may guarantee that va_arg() is uses -- but this + // recieved from the caller may guarantee that va_arg() is used -- but this // does not mean that the function _unconditionally_ uses va_arg(). // 2. After other kinds of state splits (e.g. EagerlyAssueme, callbacks of // StdLibraryFunctions separating different cases for the behavior of a From 5e1ca3182bd758544784549028b9df71d5a63dd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <[email protected]> Date: Tue, 13 Jan 2026 18:32:04 +0100 Subject: [PATCH 5/8] Improve and clarify tests --- clang/test/Analysis/unconditional-va_arg.c | 64 +++++++++++++++------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/clang/test/Analysis/unconditional-va_arg.c b/clang/test/Analysis/unconditional-va_arg.c index 2f6e2875df021..1ab10742560e4 100644 --- a/clang/test/Analysis/unconditional-va_arg.c +++ b/clang/test/Analysis/unconditional-va_arg.c @@ -30,47 +30,53 @@ void log_message(const char *msg, ...) { va_end(va); } -void simple(int fst, ...) { - // This function is not called in this test file. + +void never_called(int fst, ...) { + // This simple test function is not called in this test file. va_list va; va_start(va, fst); (void)va_arg(va, int); // expected-warning{{Unconditional use of va_arg()}} - // expected-note@-1 {{Calls to 'simple' always reach this va_arg() expression, so calling 'simple' with no variadic arguments would be undefined behavior}} + // expected-note@-1 {{Calls to 'never_called' always reach this va_arg() expression, so calling 'never_called' with no variadic arguments would be undefined behavior}} va_end(va); } -void used_with_varargs(int fst, ...) { - // This function is identical to simple(), but it is used and the call passes - // several variadic arguments. + +void called_with_varargs(int fst, ...) { + // This function is identical to never_called(), but it is called by another + // function (which will serve as the entrypoint) and the call passes several + // variadic arguments. The diagnostic is still raised. va_list va; va_start(va, fst); (void)va_arg(va, int); // expected-warning{{Unconditional use of va_arg()}} - // expected-note@-1 {{Calls to 'used_with_varargs' always reach this va_arg() expression, so calling 'used_with_varargs' with no variadic arguments would be undefined behavior}} + // expected-note@-1 {{Calls to 'called_with_varargs' always reach this va_arg() expression, so calling 'called_with_varargs' with no variadic arguments would be undefined behavior}} va_end(va); } void caller1(void) { - used_with_varargs(1, 2, 3, 4); + called_with_varargs(1, 2, 3, 4); } -void used_without_varargs(int fst, ...) { - // This function is identical to simple(), but it is used and the call passes - // no variadic arguments. + +void called_without_varargs(int fst, ...) { + // This function is identical to never_called(), but it is called by another + // function (which will serve as the entrypoint) and the call passes no + // variadic arguments. The diagnostic is still raised. va_list va; va_start(va, fst); (void)va_arg(va, int); // expected-warning{{Unconditional use of va_arg()}} - // expected-note@-1 {{Calls to 'used_without_varargs' always reach this va_arg() expression, so calling 'used_without_varargs' with no variadic arguments would be undefined behavior}} + // expected-note@-1 {{Calls to 'called_without_varargs' always reach this va_arg() expression, so calling 'called_without_varargs' with no variadic arguments would be undefined behavior}} // FIXME: Here the checker should mention that this function is _actually_ // called with no variadic arguments. va_end(va); } void caller2(void) { - used_without_varargs(1); + called_without_varargs(1); } + void multiple_va_arg_calls(int fst, ...) { - // This function is similar to simple() but calls va_arg() multiple times. To + // This function function unconditionally calls va_arg() multiple times. To // avoid spamming the user, only the first use is reported. va_list va; va_start(va, fst); @@ -94,6 +100,7 @@ void has_conditional_return(int fst, ...) { va_end(va); } + void has_conditional_logic(int fst, ...) { // As static analyzer only follows one execution path, it cannot see that // va_arg() is always executed in this function. Theoretically it would be @@ -107,9 +114,10 @@ void has_conditional_logic(int fst, ...) { va_end(va); } + void caller_has_conditional_logic(int fst, ...) { - // This function is identical to simple(), but it is used by a function that - // performs a state split before the call. + // This function is identical to never_called(), but it is used by a function + // that performs a state split before the call. // This test validates that state splits _before_ the variadic call (which // are irrelevant) do not influence the report creation. This is basically a // workaround for the fact that not all functions are entrypoints and before @@ -128,6 +136,20 @@ void caller_with_conditional_logic(int flag) { } +int has_shortcircuiting_operator(int fst, ...) { + // In addition to 'if' and similar statements, ternary operators and + // shortcircuiting logical operators can also ensure that the use of va_arg() + // is not unconditional. (These all behave identically in the BranchCondition + // callback, so this simple testcase is mostly for documentation purposes.) + va_list va; + int x; + va_start(va, fst); + x = fst || va_arg(va, int); // no-warning + va_end(va); + return x; +} + + void validate_argument(int fst) { if (fst <= 0) { printf("Negative first argument is illegal: %d!\n", fst); @@ -136,10 +158,10 @@ void validate_argument(int fst) { } void noreturn_in_function(int fst, ...) { - // This function is very similar to simple(), but calls a function which may - // call the noreturn function abort() before reaching the va_arg() call, so - // the checker doesn't produce a warning. This is important e.g. for cases - // when a function uses assertions to mark preconditions. + // This function calls a function which may call the noreturn function + // abort() before reaching the va_arg() call, so the checker doesn't produce + // a warning. This behavior is important e.g. for cases when a function uses + // assertions to mark preconditions. va_list va; validate_argument(fst); va_start(va, fst); @@ -147,6 +169,7 @@ void noreturn_in_function(int fst, ...) { va_end(va); } + void print_negative(int fst) { if (fst <= 0) { printf("First argument is negative: %d!\n", fst); @@ -165,6 +188,7 @@ void conditional_in_function(int fst, ...) { va_end(va); } + void defined_in_different_tu(int); void unknown_call(int fst, ...) { From 5e1e325ce96a86d2b8c8a816db6920ab27d3adc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <[email protected]> Date: Tue, 13 Jan 2026 19:00:30 +0100 Subject: [PATCH 6/8] Add testcase 'calls_other_variadic' --- .../Checkers/UnconditionalVAArgChecker.cpp | 3 +++ clang/test/Analysis/unconditional-va_arg.c | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp index 6ad921e21c094..3df085334a630 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp @@ -76,6 +76,9 @@ void UnconditionalVAArgChecker::checkBeginFunction(CheckerContext &C) const { // this situation is fairly rare and it is very unlikely that the "big" // outer function still has an unconditional path, there is no need to // write more complex logic that handles this. + // NOTE: Despite this, the checker can sometimes still report the + // unconditional va_arg() use in the outer function (probably because there + // is an alternative execution path that doesn't enter the inner call). C.addTransition(C.getState()->set<HasUnconditionalPath>(FD)); } } diff --git a/clang/test/Analysis/unconditional-va_arg.c b/clang/test/Analysis/unconditional-va_arg.c index 1ab10742560e4..81c39766ff28b 100644 --- a/clang/test/Analysis/unconditional-va_arg.c +++ b/clang/test/Analysis/unconditional-va_arg.c @@ -205,3 +205,22 @@ void unknown_call(int fst, ...) { // expected-note@-1 {{Calls to 'unknown_call' always reach this va_arg() expression, so calling 'unknown_call' with no variadic arguments would be undefined behavior}} va_end(va); } + + +void trivial_variadic(int fst, ...) { + // Do nothing. +} + +void calls_other_variadic(int fst, ...) { + // As the 'HasUnconditionalPath' can "remember" only one variadic function, + // I would expect that the presence of 'trivial_variadic' would prevent the + // checker from reporting the unconditional use of va_arg() in this function. + // However, for some unclear reason the checker is still able to produce this + // (true positive) report. + va_list va; + trivial_variadic(fst, 2); + va_start(va, fst); + (void)va_arg(va, int); // expected-warning{{Unconditional use of va_arg()}} + // expected-note@-1 {{Calls to 'calls_other_variadic' always reach this va_arg() expression, so calling 'calls_other_variadic' with no variadic arguments would be undefined behavior}} + va_end(va); +} From f20c1b772753ddc908fc4baaee41c255fd639de4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <[email protected]> Date: Tue, 13 Jan 2026 19:36:43 +0100 Subject: [PATCH 7/8] Handle va_arg() occurring in other functions; add relevant tests --- .../Checkers/UnconditionalVAArgChecker.cpp | 17 ++++++--- clang/test/Analysis/unconditional-va_arg.c | 35 +++++++++++++++++++ 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp index 3df085334a630..e565b590b6e22 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp @@ -125,13 +125,13 @@ void UnconditionalVAArgChecker::checkBranchCondition(const Stmt *Condition, void UnconditionalVAArgChecker::checkPreStmt(const VAArgExpr *VAA, CheckerContext &C) const { ProgramStateRef State = C.getState(); - const FunctionDecl *FD = getCurrentFunction(C); - if (FD && FD == State->get<HasUnconditionalPath>()) { + const FunctionDecl *PathFrom = State->get<HasUnconditionalPath>();//getCurrentFunction(C); + if (PathFrom) { // Reset this field in the state to ensure that multiple consecutive // va_arg() calls don't produce repeated warnings. C.addTransition(State->set<HasUnconditionalPath>(nullptr)); - IdentifierInfo *II = FD->getIdentifier(); + IdentifierInfo *II = PathFrom->getIdentifier(); if (!II) return; StringRef FN = II->getName(); @@ -143,14 +143,21 @@ void UnconditionalVAArgChecker::checkPreStmt(const VAArgExpr *VAA, SourceRange SR = VAA->getSourceRange(); PathDiagnosticLocation PDL(SR.getBegin(), C.getASTContext().getSourceManager()); - // We create a non-path-sensitive report because the path wouldn't contain // any useful information: the path leading to this function is actively // ignored by the checker and the path within the function is trivial. auto R = std::make_unique<BasicBugReport>(BT, BT.getDescription(), FullMsg, PDL); + + if (getCurrentFunction(C) != PathFrom) { + SourceRange DefSR = PathFrom->getSourceRange(); + PathDiagnosticLocation DefPDL(DefSR.getBegin(), + C.getASTContext().getSourceManager()); + std::string NoteMsg = formatv("Variadic function '{0}' is defined here", FN); + R->addNote(NoteMsg, DefPDL, DefSR); + } R->addRange(SR); - R->setDeclWithIssue(FD); + R->setDeclWithIssue(PathFrom); C.emitReport(std::move(R)); } } diff --git a/clang/test/Analysis/unconditional-va_arg.c b/clang/test/Analysis/unconditional-va_arg.c index 81c39766ff28b..3bbc30e85adb8 100644 --- a/clang/test/Analysis/unconditional-va_arg.c +++ b/clang/test/Analysis/unconditional-va_arg.c @@ -224,3 +224,38 @@ void calls_other_variadic(int fst, ...) { // expected-note@-1 {{Calls to 'calls_other_variadic' always reach this va_arg() expression, so calling 'calls_other_variadic' with no variadic arguments would be undefined behavior}} va_end(va); } + +int get_int_from_va_list(va_list *va) { + // This checker can report va_arg() expressions that are unconditionally + // reached from a variadic function, even if they are located in a different + // function. In this case the originating variadic function is marked with a + // note (to make it easier to find). + return va_arg(*va, int); // expected-warning{{Unconditional use of va_arg()}} + // expected-note@-1 {{Calls to 'uses_wrapped_va_arg' always reach this va_arg() expression, so calling 'uses_wrapped_va_arg' with no variadic arguments would be undefined behavior}} +} + +void uses_wrapped_va_arg(int fst, ...) { + // expected-note@-1 {{Variadic function 'uses_wrapped_va_arg' is defined here}} + va_list va; + va_start(va, fst); + (void)get_int_from_va_list(&va); + va_end(va); +} + +int standalone_non_variadic(va_list *va) { + // A va_arg() expression is only reported if it is unconditionally reached + // from the beginning of a _variadic_ function. Functions that use va_arg() + // on a va_list obtained from other sources (e.g. as an argument) are + // presumed to be small helper subroutines of a complex variadic function, so + // we do not report cases where the va_arg() is unconditionally reached from + // the beginning of a _non-variadic_ function. + return va_arg(*va, int); //no-warning +} + +int variadic_but_also_takes_va_list(va_list *va, ...) { + // The checker just looks for va_arg() calls without checking the origin of + // their argument, so in this (very artifical) example it produces a + // result that is arguably a false positive. + return va_arg(*va, int); // expected-warning{{Unconditional use of va_arg()}} + // expected-note@-1 {{Calls to 'variadic_but_also_takes_va_list' always reach this va_arg() expression, so calling 'variadic_but_also_takes_va_list' with no variadic arguments would be undefined behavior}} +} From b7b39af05acbcf15c5822a80ad5a0bfa2afed8d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <[email protected]> Date: Tue, 13 Jan 2026 19:37:45 +0100 Subject: [PATCH 8/8] Satisfy git-clang-format --- .../StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp index e565b590b6e22..f0f5ef4e21966 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UnconditionalVAArgChecker.cpp @@ -125,7 +125,8 @@ void UnconditionalVAArgChecker::checkBranchCondition(const Stmt *Condition, void UnconditionalVAArgChecker::checkPreStmt(const VAArgExpr *VAA, CheckerContext &C) const { ProgramStateRef State = C.getState(); - const FunctionDecl *PathFrom = State->get<HasUnconditionalPath>();//getCurrentFunction(C); + const FunctionDecl *PathFrom = + State->get<HasUnconditionalPath>(); // getCurrentFunction(C); if (PathFrom) { // Reset this field in the state to ensure that multiple consecutive // va_arg() calls don't produce repeated warnings. @@ -152,8 +153,9 @@ void UnconditionalVAArgChecker::checkPreStmt(const VAArgExpr *VAA, if (getCurrentFunction(C) != PathFrom) { SourceRange DefSR = PathFrom->getSourceRange(); PathDiagnosticLocation DefPDL(DefSR.getBegin(), - C.getASTContext().getSourceManager()); - std::string NoteMsg = formatv("Variadic function '{0}' is defined here", FN); + C.getASTContext().getSourceManager()); + std::string NoteMsg = + formatv("Variadic function '{0}' is defined here", FN); R->addNote(NoteMsg, DefPDL, DefSR); } R->addRange(SR); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
