https://github.com/kadircet updated https://github.com/llvm/llvm-project/pull/110091
From 08e2a37c8b028efcacd5ee8a269aed837ba16698 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya <kadir...@google.com> Date: Thu, 26 Sep 2024 10:55:49 +0200 Subject: [PATCH] [clangd] Improve filtering logic for undesired proto symbols This used to filter any names with `_` in them, apart from enum-constants. Resulting in discrepancies in behavior when we had fields that have `_` in the name, or for accessors like `set_`, `has_`. The logic seems to be trying to filter mangled names for nested entries, so adjusted logic to only do so for top-level decls, while still preserving some public top-level helpers. Heuristics are still leaning towards false-negatives, e.g. if a top-level entity has `_` in its name (`message Foo_Bar {}`), it'll be filtered, or an enum that prefixes its type name to constants (`enum Foo { Foo_OK }`). --- .../clangd/index/SymbolCollector.cpp | 68 +++++++++++++++---- .../clangd/unittests/SymbolCollectorTests.cpp | 64 ++++++++++++++--- 2 files changed, 110 insertions(+), 22 deletions(-) diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index a76894cf0855f3..60739032eab003 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -41,6 +41,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -75,18 +76,61 @@ bool isPrivateProtoDecl(const NamedDecl &ND) { if (ND.getIdentifier() == nullptr) return false; auto Name = ND.getIdentifier()->getName(); - if (!Name.contains('_')) - return false; - // Nested proto entities (e.g. Message::Nested) have top-level decls - // that shouldn't be used (Message_Nested). Ignore them completely. - // The nested entities are dangling type aliases, we may want to reconsider - // including them in the future. - // For enum constants, SOME_ENUM_CONSTANT is not private and should be - // indexed. Outer_INNER is private. This heuristic relies on naming style, it - // will include OUTER_INNER and exclude some_enum_constant. - // FIXME: the heuristic relies on naming style (i.e. no underscore in - // user-defined names) and can be improved. - return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower); + // There are some internal helpers like _internal_set_foo(); + if (Name.contains("_internal_")) + return true; + + // https://protobuf.dev/reference/cpp/cpp-generated/#nested-types + // Nested entities (messages/enums) has two names, one at the top-level scope, + // with a mangled name created by prepending all the outer types. These names + // are almost never preferred by the developers, so exclude them from index. + // e.g. + // message Foo { + // message Bar {} + // enum E { A } + // } + // yields: + // class Foo_Bar {}; + // enum Foo_E { Foo_E_A }; + // class Foo { + // using Bar = Foo_Bar; + // static constexpr Foo_E A = Foo_E_A; + // }; + + // We get rid of Foo_Bar and Foo_E by discarding any top-level entries with + // `_` in the name. This relies on original message/enum not having `_` in the + // name. Hence might go wrong in certain cases. + if (ND.getDeclContext()->isNamespace()) { + // Strip off some known public suffix helpers for enums, rest of the helpers + // are generated inside record decls so we don't care. + // https://protobuf.dev/reference/cpp/cpp-generated/#enum + Name.consume_back("_descriptor"); + Name.consume_back("_IsValid"); + Name.consume_back("_Name"); + Name.consume_back("_Parse"); + Name.consume_back("_MIN"); + Name.consume_back("_MAX"); + Name.consume_back("_ARRAYSIZE"); + return Name.contains('_'); + } + + // EnumConstantDecls need some special attention, despite being nested in a + // TagDecl, they might still have mangled names. We filter those by checking + // if it has parent's name as a prefix. + // This might go wrong if a nested entity has a name that starts with parent's + // name, e.g: enum Foo { Foo_X }. + if (llvm::isa<EnumConstantDecl>(&ND)) { + auto *DC = llvm::cast<EnumDecl>(ND.getDeclContext()); + if (!DC || !DC->getIdentifier()) + return false; + auto CtxName = DC->getIdentifier()->getName(); + return !CtxName.empty() && Name.consume_front(CtxName) && + Name.consume_front("_"); + } + + // Now we're only left with fields/methods without an `_internal_` in the + // name, they're intended for public use. + return false; } // We only collect #include paths for symbols that are suitable for global code diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index 0666be95b6b9ee..e8088cb37fa51c 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -201,19 +201,63 @@ TEST_F(ShouldCollectSymbolTest, NoPrivateProtoSymbol) { build( R"(// Generated by the protocol buffer compiler. DO NOT EDIT! namespace nx { - class Top_Level {}; - class TopLevel {}; - enum Kind { - KIND_OK, - Kind_Not_Ok, + enum Outer_Enum : int { + Outer_Enum_KIND1, + Outer_Enum_Kind_2, }; + bool Outer_Enum_IsValid(int); + + class Outer_Inner {}; + class Outer { + using Inner = Outer_Inner; + using Enum = Outer_Enum; + static constexpr Enum KIND1 = Outer_Enum_KIND1; + static constexpr Enum Kind_2 = Outer_Enum_Kind_2; + static bool Enum_IsValid(int); + int &x(); + void set_x(); + void _internal_set_x(); + + int &Outer_y(); + }; + enum Foo { + FOO_VAL1, + Foo_VAL2, + }; + bool Foo_IsValid(int); })"); - EXPECT_TRUE(shouldCollect("nx::TopLevel")); - EXPECT_TRUE(shouldCollect("nx::Kind::KIND_OK")); - EXPECT_TRUE(shouldCollect("nx::Kind")); - EXPECT_FALSE(shouldCollect("nx::Top_Level")); - EXPECT_FALSE(shouldCollect("nx::Kind::Kind_Not_Ok")); + // Make sure all the mangled names for Outer::Enum is discarded. + EXPECT_FALSE(shouldCollect("nx::Outer_Enum")); + EXPECT_FALSE(shouldCollect("nx::Outer_Enum_KIND1")); + EXPECT_FALSE(shouldCollect("nx::Outer_Enum_Kind_2")); + EXPECT_FALSE(shouldCollect("nx::Outer_Enum_IsValid")); + // But nested aliases are preserved. + EXPECT_TRUE(shouldCollect("nx::Outer::Enum")); + EXPECT_TRUE(shouldCollect("nx::Outer::KIND1")); + EXPECT_TRUE(shouldCollect("nx::Outer::Kind_2")); + EXPECT_TRUE(shouldCollect("nx::Outer::Enum_IsValid")); + + // Check for Outer::Inner. + EXPECT_FALSE(shouldCollect("nx::Outer_Inner")); + EXPECT_TRUE(shouldCollect("nx::Outer")); + EXPECT_TRUE(shouldCollect("nx::Outer::Inner")); + + // Make sure field related information is preserved, unless it's explicitly + // marked with `_internal_`. + EXPECT_TRUE(shouldCollect("nx::Outer::x")); + EXPECT_TRUE(shouldCollect("nx::Outer::set_x")); + EXPECT_FALSE(shouldCollect("nx::Outer::_internal_set_x")); + EXPECT_TRUE(shouldCollect("nx::Outer::Outer_y")); + + // Handling of a top-level enum + EXPECT_TRUE(shouldCollect("nx::Foo::FOO_VAL1")); + EXPECT_TRUE(shouldCollect("nx::FOO_VAL1")); + EXPECT_TRUE(shouldCollect("nx::Foo_IsValid")); + // Our heuristic goes wrong here, if the user has a nested name that starts + // with parent's name. + EXPECT_FALSE(shouldCollect("nx::Foo::Foo_VAL2")); + EXPECT_FALSE(shouldCollect("nx::Foo_VAL2")); } TEST_F(ShouldCollectSymbolTest, DoubleCheckProtoHeaderComment) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits