https://github.com/higher-performance updated https://github.com/llvm/llvm-project/pull/129934
>From e99bb267a950f75ea4fc23454762a7422c776bca Mon Sep 17 00:00:00 2001 From: higher-performance <higher.performance.git...@gmail.com> Date: Wed, 5 Mar 2025 15:49:06 -0500 Subject: [PATCH] Reduce memory usage in AST parent map generation by partially reverting quadratic slowdown mitigation The use of parent maps (hasParent(), hasAncestor(), etc.) in Clang AST matchers is no longer guaranteed to avoid quadratic slowdown, but will only do so in pathological cases. --- clang/lib/AST/ParentMapContext.cpp | 50 ++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp index e9387ec79c373..b7156b892b01a 100644 --- a/clang/lib/AST/ParentMapContext.cpp +++ b/clang/lib/AST/ParentMapContext.cpp @@ -60,6 +60,29 @@ class ParentMapContext::ParentMap { template <typename, typename...> friend struct ::MatchParents; + template <class T> struct IndirectDenseMapInfo { + using Ptr = T *; + using Base = llvm::DenseMapInfo<std::remove_cv_t<T>>; + static inline Ptr getEmptyKey() { + return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getEmptyKey()); + } + static inline Ptr getTombstoneKey() { + return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getTombstoneKey()); + } + static unsigned getHashValue(Ptr Val) { + return Val == getEmptyKey() || Val == getTombstoneKey() + ? 0 + : Base::getHashValue(*Val); + } + static bool isEqual(Ptr LHS, Ptr RHS) { + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) { + return LHS == RHS; + } + return Base::isEqual(*LHS, *RHS); + } + }; + /// Contains parents of a node. class ParentVector { public: @@ -70,16 +93,37 @@ class ParentMapContext::ParentMap { push_back(Value); } bool contains(const DynTypedNode &Value) { - return Seen.contains(Value); + assert(Value.getMemoizationData()); + bool found = FragileLazySeenCache.contains(&Value); + while (!found && ItemsProcessed < Items.size()) { + found |= FragileLazySeenCache.insert(&Items[ItemsProcessed]).second; + ++ItemsProcessed; + } + return found; } void push_back(const DynTypedNode &Value) { - if (!Value.getMemoizationData() || Seen.insert(Value).second) + if (!Value.getMemoizationData() || !contains(Value)) { + const size_t OldCapacity = Items.capacity(); Items.push_back(Value); + if (OldCapacity != Items.capacity()) { + // Pointers are invalidated; remove them. + ItemsProcessed = 0; + // Free memory to avoid doubling peak memory usage during rehashing + FragileLazySeenCache.clear(); + } + } } llvm::ArrayRef<DynTypedNode> view() const { return Items; } private: + // BE CAREFUL. Pointers into this container are stored in the cache. llvm::SmallVector<DynTypedNode, 2> Items; - llvm::SmallDenseSet<DynTypedNode, 2> Seen; + // This cache is fragile because it contains pointers that are invalidated + // when the vector capacity changes. + llvm::SmallDenseSet<const DynTypedNode *, 2, + IndirectDenseMapInfo<const DynTypedNode>> + FragileLazySeenCache; + // Lazily tracks which items have been processed for the cache. + size_t ItemsProcessed = 0; }; /// Maps from a node to its parents. This is used for nodes that have _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits