Author: Richard Smith
Date: 2025-07-01T13:31:46-07:00
New Revision: c56c349d39464d859b0f8655a4502e5c06924b66

URL: 
https://github.com/llvm/llvm-project/commit/c56c349d39464d859b0f8655a4502e5c06924b66
DIFF: 
https://github.com/llvm/llvm-project/commit/c56c349d39464d859b0f8655a4502e5c06924b66.diff

LOG: [clang-tidy] Switch misc-confusable-identifiers check to a faster 
algorithm. (#130369)

Optimizations:

- Only build the skeleton for each identifier once, rather than once for
each declaration of that identifier.
- Only compute the contexts in which identifiers are declared for
identifiers that have the same skeleton as another identifier in the
translation unit.
- Only compare pairs of declarations that are declared in related
contexts, rather than comparing all pairs of declarations with the same
skeleton.

Also simplify by removing the caching of enclosing `DeclContext` sets,
because with the above changes we don't even compute the enclosing
`DeclContext` sets in common cases. Instead, we terminate the traversal
to enclosing `DeclContext`s immediately if we've already found another
declaration in that context with the same identifier. (This optimization
is not currently applied to the `forallBases` traversal, but could be
applied there too if needed.)

This also fixes two bugs that together caused the check to fail to find
some of the issues it was looking for:

- The old check skipped comparisons of declarations from different
contexts unless both declarations were type template parameters. This
caused the checker to not warn on some instances of the CVE it is
intended to detect.
- The old check skipped comparisons of declarations in all base classes
other than the first one found by the traversal. This appears to be an
oversight, incorrectly returning `false` rather than `true` from the
`forallBases` callback, which terminates traversal.

This also fixes an issue where the check would have false positives for
template parameters and function parameters in some cases, because those
parameters sometimes have a parent `DeclContext` that is the parent of
the parameterized entity, or sometimes is the translation unit. In
either case, this would cause warnings about declarations that are never
visible together in any scope.

This decreases the runtime of this check, especially in the common case
where there are few or no skeletons with two or more different
identifiers. Running this check over LLVM, clang, and clang-tidy, the
wall time for the check as reported by clang-tidy's internal profiler is
reduced from 5202.86s to 3900.90s.

Added: 
    

Modified: 
    clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp
    clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h
    clang-tools-extra/test/clang-tidy/checkers/misc/confusable-identifiers.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp 
b/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp
index 7639476980529..79ae5ee98182b 100644
--- a/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp
@@ -1,5 +1,4 @@
-//===--- ConfusableIdentifierCheck.cpp -
-// clang-tidy--------------------------===//
+//===--- ConfusableIdentifierCheck.cpp - clang-tidy 
-----------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -9,6 +8,7 @@
 
 #include "ConfusableIdentifierCheck.h"
 
+#include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/ConvertUTF.h"
@@ -88,99 +88,98 @@ static llvm::SmallString<64U> skeleton(StringRef Name) {
   return Skeleton;
 }
 
-static bool mayShadowImpl(const DeclContext *DC0, const DeclContext *DC1) {
-  return DC0 && DC0 == DC1;
-}
-
-static bool mayShadowImpl(const NamedDecl *ND0, const NamedDecl *ND1) {
-  return isa<TemplateTypeParmDecl>(ND0) || isa<TemplateTypeParmDecl>(ND1);
-}
-
-static bool isMemberOf(const ConfusableIdentifierCheck::ContextInfo *DC0,
-                       const ConfusableIdentifierCheck::ContextInfo *DC1) {
-  return llvm::is_contained(DC1->Bases, DC0->PrimaryContext);
-}
-
-static bool enclosesContext(const ConfusableIdentifierCheck::ContextInfo *DC0,
-                            const ConfusableIdentifierCheck::ContextInfo *DC1) 
{
-  if (DC0->PrimaryContext == DC1->PrimaryContext)
-    return true;
-
-  return llvm::is_contained(DC0->PrimaryContexts, DC1->PrimaryContext) ||
-         llvm::is_contained(DC1->PrimaryContexts, DC0->PrimaryContext);
-}
-
-static bool mayShadow(const NamedDecl *ND0,
-                      const ConfusableIdentifierCheck::ContextInfo *DC0,
-                      const NamedDecl *ND1,
-                      const ConfusableIdentifierCheck::ContextInfo *DC1) {
-
-  if (!DC0->Bases.empty() && !DC1->Bases.empty()) {
-    // if any of the declaration is a non-private member of the other
-    // declaration, it's shadowed by the former
-
-    if (ND1->getAccess() != AS_private && isMemberOf(DC1, DC0))
-      return true;
+namespace {
+struct Entry {
+  const NamedDecl *ND;
+  const Decl *Parent;
+  bool FromDerivedClass;
+};
+} // namespace
 
-    if (ND0->getAccess() != AS_private && isMemberOf(DC0, DC1))
+// Map from a context to the declarations in that context with the current
+// skeleton. At most one entry per distinct identifier is tracked. The
+// context is usually a `DeclContext`, but can also be a template declaration
+// that has no corresponding context, such as an alias template or variable
+// template.
+using DeclsWithinContextMap =
+    llvm::DenseMap<const Decl *, llvm::SmallVector<Entry, 1>>;
+
+static bool addToContext(DeclsWithinContextMap &DeclsWithinContext,
+                         const Decl *Context, Entry E) {
+  auto &Decls = DeclsWithinContext[Context];
+  if (!Decls.empty() &&
+      Decls.back().ND->getIdentifier() == E.ND->getIdentifier()) {
+    // Already have a declaration with this identifier in this context. Don't
+    // track another one. This means that if an outer name is confusable with 
an
+    // inner name, we'll only diagnose the outer name once, pointing at the
+    // first inner declaration with that name.
+    if (Decls.back().FromDerivedClass && !E.FromDerivedClass) {
+      // Prefer the declaration that's not from the derived class, because that
+      // conflicts with more declarations.
+      Decls.back() = E;
       return true;
-  }
-
-  if (!mayShadowImpl(DC0->NonTransparentContext, DC1->NonTransparentContext) &&
-      !mayShadowImpl(ND0, ND1))
+    }
     return false;
-
-  return enclosesContext(DC0, DC1);
+  }
+  Decls.push_back(E);
+  return true;
 }
 
-const ConfusableIdentifierCheck::ContextInfo *
-ConfusableIdentifierCheck::getContextInfo(const DeclContext *DC) {
-  const DeclContext *PrimaryContext = DC->getPrimaryContext();
-  auto [It, Inserted] = ContextInfos.try_emplace(PrimaryContext);
-  if (!Inserted)
-    return &It->second;
-
-  ContextInfo &Info = It->second;
-  Info.PrimaryContext = PrimaryContext;
-  Info.NonTransparentContext = PrimaryContext;
+static void addToEnclosingContexts(DeclsWithinContextMap &DeclsWithinContext,
+                                   const Decl *Parent, const NamedDecl *ND) {
+  const Decl *Outer = Parent;
+  while (Outer) {
+    if (const auto *NS = dyn_cast<NamespaceDecl>(Outer))
+      Outer = NS->getCanonicalDecl();
+
+    if (!addToContext(DeclsWithinContext, Outer, {ND, Parent, false}))
+      return;
+
+    if (const auto *RD = dyn_cast<CXXRecordDecl>(Outer)) {
+      RD = RD->getDefinition();
+      if (RD) {
+        RD->forallBases([&](const CXXRecordDecl *Base) {
+          addToContext(DeclsWithinContext, Base, {ND, Parent, true});
+          return true;
+        });
+      }
+    }
 
-  while (Info.NonTransparentContext->isTransparentContext()) {
-    Info.NonTransparentContext = Info.NonTransparentContext->getParent();
-    if (!Info.NonTransparentContext)
+    auto *OuterDC = Outer->getDeclContext();
+    if (!OuterDC)
       break;
+    Outer = cast_or_null<Decl>(OuterDC->getNonTransparentContext());
   }
+}
+
+void ConfusableIdentifierCheck::check(
+    const ast_matchers::MatchFinder::MatchResult &Result) {
+  const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl");
+  if (!ND)
+    return;
 
-  if (Info.NonTransparentContext)
-    Info.NonTransparentContext =
-        Info.NonTransparentContext->getPrimaryContext();
+  addDeclToCheck(ND,
+                 cast<Decl>(ND->getDeclContext()->getNonTransparentContext()));
 
-  while (DC) {
-    if (!isa<LinkageSpecDecl>(DC) && !isa<ExportDecl>(DC))
-      Info.PrimaryContexts.push_back(DC->getPrimaryContext());
-    DC = DC->getParent();
+  // Associate template parameters with this declaration of this template.
+  if (const auto *TD = dyn_cast<TemplateDecl>(ND)) {
+    for (const NamedDecl *Param : *TD->getTemplateParameters())
+      addDeclToCheck(Param, TD->getTemplatedDecl());
   }
 
-  if (const auto *RD = dyn_cast<CXXRecordDecl>(PrimaryContext)) {
-    RD = RD->getDefinition();
-    if (RD) {
-      Info.Bases.push_back(RD);
-      RD->forallBases([&](const CXXRecordDecl *Base) {
-        Info.Bases.push_back(Base);
-        return false;
-      });
-    }
+  // Associate function parameters with this declaration of this function.
+  if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
+    for (const NamedDecl *Param : FD->parameters())
+      addDeclToCheck(Param, ND);
   }
-
-  return &Info;
 }
 
-void ConfusableIdentifierCheck::check(
-    const ast_matchers::MatchFinder::MatchResult &Result) {
-  const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl");
-  if (!ND)
+void ConfusableIdentifierCheck::addDeclToCheck(const NamedDecl *ND,
+                                               const Decl *Parent) {
+  if (!ND || !Parent)
     return;
 
-  IdentifierInfo *NDII = ND->getIdentifier();
+  const IdentifierInfo *NDII = ND->getIdentifier();
   if (!NDII)
     return;
 
@@ -188,34 +187,78 @@ void ConfusableIdentifierCheck::check(
   if (NDName.empty())
     return;
 
-  const ContextInfo *Info = getContextInfo(ND->getDeclContext());
+  NameToDecls[NDII].push_back({ND, Parent});
+}
+
+void ConfusableIdentifierCheck::onEndOfTranslationUnit() {
+  llvm::StringMap<llvm::SmallVector<const IdentifierInfo *, 1>> 
SkeletonToNames;
+  // Compute the skeleton for each identifier.
+  for (auto &[Ident, Decls] : NameToDecls) {
+    SkeletonToNames[skeleton(Ident->getName())].push_back(Ident);
+  }
 
-  llvm::SmallVector<Entry> &Mapped = Mapper[skeleton(NDName)];
-  for (const Entry &E : Mapped) {
-    if (!mayShadow(ND, Info, E.Declaration, E.Info))
+  // Visit each skeleton with more than one identifier.
+  for (auto &[Skel, Idents] : SkeletonToNames) {
+    if (Idents.size() < 2) {
       continue;
+    }
 
-    const IdentifierInfo *ONDII = E.Declaration->getIdentifier();
-    StringRef ONDName = ONDII->getName();
-    if (ONDName == NDName)
-      continue;
+    // Find the declaration contexts that transitively contain each identifier.
+    DeclsWithinContextMap DeclsWithinContext;
+    for (const IdentifierInfo *II : Idents) {
+      for (auto [ND, Parent] : NameToDecls[II]) {
+        addToEnclosingContexts(DeclsWithinContext, Parent, ND);
+      }
+    }
 
-    diag(ND->getLocation(), "%0 is confusable with %1") << ND << E.Declaration;
-    diag(E.Declaration->getLocation(), "other declaration found here",
-         DiagnosticIDs::Note);
+    // Check to see if any declaration is declared in a context that
+    // transitively contains another declaration with a 
diff erent identifier but
+    // the same skeleton.
+    for (const IdentifierInfo *II : Idents) {
+      for (auto [OuterND, OuterParent] : NameToDecls[II]) {
+        for (Entry Inner : DeclsWithinContext[OuterParent]) {
+          // Don't complain if the identifiers are the same.
+          if (OuterND->getIdentifier() == Inner.ND->getIdentifier())
+            continue;
+
+          // Don't complain about a derived-class name shadowing a base class
+          // private member.
+          if (OuterND->getAccess() == AS_private && Inner.FromDerivedClass)
+            continue;
+
+          // If the declarations are in the same context, only diagnose the
+          // later one.
+          if (OuterParent == Inner.Parent &&
+              Inner.ND->getASTContext()
+                  .getSourceManager()
+                  .isBeforeInTranslationUnit(Inner.ND->getLocation(),
+                                             OuterND->getLocation()))
+            continue;
+
+          diag(Inner.ND->getLocation(), "%0 is confusable with %1")
+              << Inner.ND << OuterND;
+          diag(OuterND->getLocation(), "other declaration found here",
+               DiagnosticIDs::Note);
+        }
+      }
+    }
   }
 
-  Mapped.push_back({ND, Info});
-}
-
-void ConfusableIdentifierCheck::onEndOfTranslationUnit() {
-  Mapper.clear();
-  ContextInfos.clear();
+  NameToDecls.clear();
 }
 
 void ConfusableIdentifierCheck::registerMatchers(
     ast_matchers::MatchFinder *Finder) {
-  Finder->addMatcher(ast_matchers::namedDecl().bind("nameddecl"), this);
+  // Parameter declarations sometimes use the translation unit or some outer
+  // enclosing context as their `DeclContext`, instead of their parent, so
+  // we handle them specially in `check`.
+  auto AnyParamDecl = ast_matchers::anyOf(
+      ast_matchers::parmVarDecl(), ast_matchers::templateTypeParmDecl(),
+      ast_matchers::nonTypeTemplateParmDecl(),
+      ast_matchers::templateTemplateParmDecl());
+  
Finder->addMatcher(ast_matchers::namedDecl(ast_matchers::unless(AnyParamDecl))
+                         .bind("nameddecl"),
+                     this);
 }
 
 } // namespace clang::tidy::misc

diff  --git a/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h 
b/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h
index f3b0c8ed00306..9cce6cce67682 100644
--- a/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.h
@@ -1,5 +1,4 @@
-//===--- ConfusableIdentifierCheck.h - clang-tidy
-//-------------------------------*- C++ -*-===//
+//===--- ConfusableIdentifierCheck.h - clang-tidy ---------------*- C++ 
-*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -11,7 +10,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_CONFUSABLE_IDENTIFIER_CHECK_H
 
 #include "../ClangTidyCheck.h"
-#include <unordered_map>
+#include "llvm/ADT/DenseMap.h"
 
 namespace clang::tidy::misc {
 
@@ -31,23 +30,13 @@ class ConfusableIdentifierCheck : public ClangTidyCheck {
     return TK_IgnoreUnlessSpelledInSource;
   }
 
-  struct ContextInfo {
-    const DeclContext *PrimaryContext;
-    const DeclContext *NonTransparentContext;
-    llvm::SmallVector<const DeclContext *> PrimaryContexts;
-    llvm::SmallVector<const CXXRecordDecl *> Bases;
-  };
-
 private:
-  struct Entry {
-    const NamedDecl *Declaration;
-    const ContextInfo *Info;
-  };
-
-  const ContextInfo *getContextInfo(const DeclContext *DC);
+  void addDeclToCheck(const NamedDecl *ND, const Decl *Parent);
 
-  llvm::StringMap<llvm::SmallVector<Entry>> Mapper;
-  std::unordered_map<const DeclContext *, ContextInfo> ContextInfos;
+  llvm::DenseMap<
+      const IdentifierInfo *,
+      llvm::SmallVector<std::pair<const NamedDecl *, const Decl *>, 1>>
+      NameToDecls;
 };
 
 } // namespace clang::tidy::misc

diff  --git 
a/clang-tools-extra/test/clang-tidy/checkers/misc/confusable-identifiers.cpp 
b/clang-tools-extra/test/clang-tidy/checkers/misc/confusable-identifiers.cpp
index cdfed7edb431d..fb7eeed7e715f 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/misc/confusable-identifiers.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc/confusable-identifiers.cpp
@@ -74,6 +74,47 @@ template <typename t1, typename tl>
 // CHECK-MESSAGES: :[[#@LINE-2]]:20: note: other declaration found here
 void f9();
 
+namespace 
diff erent_contexts {
+  // No warning for names in unrelated contexts.
+  template <typename u1> void 
diff erent_templates_1();
+  template <typename ul> void 
diff erent_templates_2();
+  namespace inner {
+    int ul;
+  }
+}
+
+namespace same_template {
+  template <typename u1, typename ul> using T = int;
+  // CHECK-MESSAGES: :[[#@LINE-1]]:35: warning: 'ul' is confusable with 'u1' 
[misc-confusable-identifiers]
+  // CHECK-MESSAGES: :[[#@LINE-2]]:22: note: other declaration found here
+
+  template <typename v1, typename vl> int n;
+  // CHECK-MESSAGES: :[[#@LINE-1]]:35: warning: 'vl' is confusable with 'v1' 
[misc-confusable-identifiers]
+  // CHECK-MESSAGES: :[[#@LINE-2]]:22: note: other declaration found here
+
+  template <typename w1> int wl;
+  // CHECK-MESSAGES: :[[#@LINE-1]]:22: warning: 'w1' is confusable with 'wl' 
[misc-confusable-identifiers]
+  // CHECK-MESSAGES: :[[#@LINE-2]]:30: note: other declaration found here
+
+  int xl;
+  template <typename x1> int x;
+  // CHECK-MESSAGES: :[[#@LINE-1]]:22: warning: 'x1' is confusable with 'xl' 
[misc-confusable-identifiers]
+  // CHECK-MESSAGES: :[[#@LINE-3]]:7: note: other declaration found here
+}
+
+namespace f10 {
+int il;
+namespace inner {
+  int i1;
+  // CHECK-MESSAGES: :[[#@LINE-1]]:7: warning: 'i1' is confusable with 'il' 
[misc-confusable-identifiers]
+  // CHECK-MESSAGES: :[[#@LINE-4]]:5: note: other declaration found here
+  int j1;
+  // CHECK-MESSAGES: :[[#@LINE-1]]:7: warning: 'j1' is confusable with 'jl' 
[misc-confusable-identifiers]
+  // CHECK-MESSAGES: :[[#@LINE+2]]:5: note: other declaration found here
+}
+int jl;
+}
+
 struct Base0 {
   virtual void mO0();
 
@@ -103,3 +144,21 @@ struct Derived1 : Base1 {
 
   long mI1(); // no warning: mII is private
 };
+
+struct Base2 {
+  long nO0;
+
+private:
+  long nII;
+};
+
+struct Mid2 : Base0, Base1, Base2 {
+};
+
+struct Derived2 : Mid2 {
+  long nOO;
+  // CHECK-MESSAGES: :[[#@LINE-1]]:8: warning: 'nOO' is confusable with 'nO0' 
[misc-confusable-identifiers]
+  // CHECK-MESSAGES: :[[#@LINE-12]]:8: note: other declaration found here
+
+  long nI1(); // no warning: mII is private
+};


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to