Author: Timm Baeder Date: 2025-08-18T17:15:31+02:00 New Revision: 6ce13ae1c20515e7c4554cde028e3a0990786075
URL: https://github.com/llvm/llvm-project/commit/6ce13ae1c20515e7c4554cde028e3a0990786075 DIFF: https://github.com/llvm/llvm-project/commit/6ce13ae1c20515e7c4554cde028e3a0990786075.diff LOG: [clang][bytecode] Always track item types in InterpStack (#151088) This has been a long-standing problem, but we didn't use to call the destructors of items on the stack unless we explicitly `pop()` or `discard()` them. When interpretation was interrupted midway-through (because something failed), we left `Pointer`s on the stack. Since all `Block`s track what `Pointer`s point to them (via a doubly-linked list in the `Pointer`), that meant we potentially leave deallocated pointers in that list. We used to work around this by removing the `Pointer` from the list before deallocating the block. However, we now want to track pointers to global blocks as well, which poses a problem since the blocks are never deallocated and thus those pointers are always left dangling. I've tried a few different approaches to fixing this but in the end I just gave up on the idea of never knowing what items are in the stack. We already have an `ItemTypes` vector that we use for debugging assertions. This patch simply enables this vector unconditionally and uses it in the abort case to properly `discard()` all elements from the stack. That's a little sad IMO but I don't know of another way of solving this problem. As expected, this is a slight hit to compile times: https://llvm-compile-time-tracker.com/compare.php?from=574d0a92060bf4808776b7a0239ffe91a092b15d&to=0317105f559093cfb909bfb01857a6b837991940&stat=instructions:u Added: Modified: clang/lib/AST/ByteCode/Compiler.cpp clang/lib/AST/ByteCode/Context.cpp clang/lib/AST/ByteCode/Context.h clang/lib/AST/ByteCode/Descriptor.h clang/lib/AST/ByteCode/Function.h clang/lib/AST/ByteCode/InterpBlock.cpp clang/lib/AST/ByteCode/InterpBlock.h clang/lib/AST/ByteCode/InterpStack.cpp clang/lib/AST/ByteCode/InterpStack.h clang/lib/AST/ByteCode/InterpState.cpp clang/lib/AST/ByteCode/Pointer.h clang/lib/AST/ByteCode/PrimType.h clang/lib/AST/ByteCode/Program.h clang/test/AST/ByteCode/c.c Removed: ################################################################################ diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 5c416474d3bcf..f2ce69a62838e 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -2980,20 +2980,25 @@ bool Compiler<Emitter>::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { if (T && !E->isLValue()) return this->delegate(Init); - if (std::optional<unsigned> GlobalIndex = P.createGlobal(E)) { - if (!this->emitGetPtrGlobal(*GlobalIndex, E)) - return false; + std::optional<unsigned> GlobalIndex = P.createGlobal(E); + if (!GlobalIndex) + return false; - if (T) { - if (!this->visit(Init)) - return false; - return this->emitInitGlobal(*T, *GlobalIndex, E); - } + if (!this->emitGetPtrGlobal(*GlobalIndex, E)) + return false; + + // Since this is a global variable, we might've already seen, + // don't do it again. + if (P.isGlobalInitialized(*GlobalIndex)) + return true; - return this->visitInitializer(Init) && this->emitFinishInit(E); + if (T) { + if (!this->visit(Init)) + return false; + return this->emitInitGlobal(*T, *GlobalIndex, E); } - return false; + return this->visitInitializer(Init) && this->emitFinishInit(E); } // Otherwise, use a local variable. diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp index 6343b2af313f1..36eb7607e70bf 100644 --- a/clang/lib/AST/ByteCode/Context.cpp +++ b/clang/lib/AST/ByteCode/Context.cpp @@ -398,17 +398,11 @@ const llvm::fltSemantics &Context::getFloatSemantics(QualType T) const { } bool Context::Run(State &Parent, const Function *Func) { - - { - InterpState State(Parent, *P, Stk, *this, Func); - if (Interpret(State)) { - assert(Stk.empty()); - return true; - } - // State gets destroyed here, so the Stk.clear() below doesn't accidentally - // remove values the State's destructor might access. + InterpState State(Parent, *P, Stk, *this, Func); + if (Interpret(State)) { + assert(Stk.empty()); + return true; } - Stk.clear(); return false; } diff --git a/clang/lib/AST/ByteCode/Context.h b/clang/lib/AST/ByteCode/Context.h index a6d90bb385067..fa98498dbe8fa 100644 --- a/clang/lib/AST/ByteCode/Context.h +++ b/clang/lib/AST/ByteCode/Context.h @@ -30,7 +30,7 @@ namespace interp { class Function; class Program; class State; -enum PrimType : unsigned; +enum PrimType : uint8_t; struct ParamOffset { unsigned Offset; diff --git a/clang/lib/AST/ByteCode/Descriptor.h b/clang/lib/AST/ByteCode/Descriptor.h index 4a808c0a2d216..90dc2b4aa3111 100644 --- a/clang/lib/AST/ByteCode/Descriptor.h +++ b/clang/lib/AST/ByteCode/Descriptor.h @@ -24,7 +24,7 @@ class Record; class SourceInfo; struct InitMap; struct Descriptor; -enum PrimType : unsigned; +enum PrimType : uint8_t; using DeclTy = llvm::PointerUnion<const Decl *, const Expr *>; using InitMapPtr = std::optional<std::pair<bool, std::shared_ptr<InitMap>>>; diff --git a/clang/lib/AST/ByteCode/Function.h b/clang/lib/AST/ByteCode/Function.h index 92363b62c85d4..af429b7849e88 100644 --- a/clang/lib/AST/ByteCode/Function.h +++ b/clang/lib/AST/ByteCode/Function.h @@ -28,7 +28,7 @@ namespace interp { class Program; class ByteCodeEmitter; class Pointer; -enum PrimType : uint32_t; +enum PrimType : uint8_t; /// Describes a scope block. /// diff --git a/clang/lib/AST/ByteCode/InterpBlock.cpp b/clang/lib/AST/ByteCode/InterpBlock.cpp index 69221d85d6715..b7fd324594c82 100644 --- a/clang/lib/AST/ByteCode/InterpBlock.cpp +++ b/clang/lib/AST/ByteCode/InterpBlock.cpp @@ -18,10 +18,6 @@ using namespace clang::interp; void Block::addPointer(Pointer *P) { assert(P); - if (IsStatic) { - assert(!Pointers); - return; - } #ifndef NDEBUG assert(!hasPointer(P)); @@ -39,10 +35,6 @@ void Block::addPointer(Pointer *P) { void Block::removePointer(Pointer *P) { assert(P->isBlockPointer()); assert(P); - if (IsStatic) { - assert(!Pointers); - return; - } #ifndef NDEBUG assert(hasPointer(P)); @@ -74,10 +66,6 @@ void Block::replacePointer(Pointer *Old, Pointer *New) { assert(New); assert(New->isBlockPointer()); assert(Old != New); - if (IsStatic) { - assert(!Pointers); - return; - } #ifndef NDEBUG assert(hasPointer(Old)); #endif diff --git a/clang/lib/AST/ByteCode/InterpBlock.h b/clang/lib/AST/ByteCode/InterpBlock.h index 8f30a6ece74ee..778ac8fdb085c 100644 --- a/clang/lib/AST/ByteCode/InterpBlock.h +++ b/clang/lib/AST/ByteCode/InterpBlock.h @@ -22,7 +22,7 @@ class Block; class DeadBlock; class InterpState; class Pointer; -enum PrimType : unsigned; +enum PrimType : uint8_t; /// A memory block, either on the stack or in the heap. /// diff --git a/clang/lib/AST/ByteCode/InterpStack.cpp b/clang/lib/AST/ByteCode/InterpStack.cpp index 6b748d62b83bd..7920378f365f9 100644 --- a/clang/lib/AST/ByteCode/InterpStack.cpp +++ b/clang/lib/AST/ByteCode/InterpStack.cpp @@ -26,33 +26,33 @@ InterpStack::~InterpStack() { std::free(Chunk); Chunk = nullptr; StackSize = 0; -#ifndef NDEBUG ItemTypes.clear(); -#endif } // We keep the last chunk around to reuse. void InterpStack::clear() { - if (!Chunk) - return; - - if (Chunk->Next) - std::free(Chunk->Next); - - assert(Chunk); - StackSize = 0; -#ifndef NDEBUG - ItemTypes.clear(); -#endif + for (PrimType Item : llvm::reverse(ItemTypes)) { + TYPE_SWITCH(Item, { this->discard<T>(); }); + } + assert(ItemTypes.empty()); + assert(empty()); } void InterpStack::clearTo(size_t NewSize) { - assert(NewSize <= size()); - size_t ToShrink = size() - NewSize; - if (ToShrink == 0) + if (NewSize == 0) + return clear(); + if (NewSize == size()) return; - shrink(ToShrink); + assert(NewSize <= size()); + for (PrimType Item : llvm::reverse(ItemTypes)) { + TYPE_SWITCH(Item, { this->discard<T>(); }); + + if (size() == NewSize) + break; + } + + // Note: discard() above already removed the types from ItemTypes. assert(size() == NewSize); } @@ -105,25 +105,9 @@ void InterpStack::shrink(size_t Size) { Chunk->End -= Size; StackSize -= Size; - -#ifndef NDEBUG - size_t TypesSize = 0; - for (PrimType T : ItemTypes) - TYPE_SWITCH(T, { TypesSize += aligned_size<T>(); }); - - size_t StackSize = size(); - while (TypesSize > StackSize) { - TYPE_SWITCH(ItemTypes.back(), { - TypesSize -= aligned_size<T>(); - ItemTypes.pop_back(); - }); - } - assert(TypesSize == StackSize); -#endif } void InterpStack::dump() const { -#ifndef NDEBUG llvm::errs() << "Items: " << ItemTypes.size() << ". Size: " << size() << '\n'; if (ItemTypes.empty()) return; @@ -133,11 +117,11 @@ void InterpStack::dump() const { // The type of the item on the top of the stack is inserted to the back // of the vector, so the iteration has to happen backwards. - for (auto TyIt = ItemTypes.rbegin(); TyIt != ItemTypes.rend(); ++TyIt) { - Offset += align(primSize(*TyIt)); + for (PrimType Item : llvm::reverse(ItemTypes)) { + Offset += align(primSize(Item)); llvm::errs() << Index << '/' << Offset << ": "; - TYPE_SWITCH(*TyIt, { + TYPE_SWITCH(Item, { const T &V = peek<T>(Offset); llvm::errs() << V; }); @@ -145,5 +129,4 @@ void InterpStack::dump() const { ++Index; } -#endif } diff --git a/clang/lib/AST/ByteCode/InterpStack.h b/clang/lib/AST/ByteCode/InterpStack.h index 580494eb2347c..b0f9f6e225682 100644 --- a/clang/lib/AST/ByteCode/InterpStack.h +++ b/clang/lib/AST/ByteCode/InterpStack.h @@ -17,7 +17,6 @@ #include "IntegralAP.h" #include "MemberPointer.h" #include "PrimType.h" -#include <vector> namespace clang { namespace interp { @@ -33,18 +32,14 @@ class InterpStack final { /// Constructs a value in place on the top of the stack. template <typename T, typename... Tys> void push(Tys &&...Args) { new (grow(aligned_size<T>())) T(std::forward<Tys>(Args)...); -#ifndef NDEBUG ItemTypes.push_back(toPrimType<T>()); -#endif } /// Returns the value from the top of the stack and removes it. template <typename T> T pop() { -#ifndef NDEBUG assert(!ItemTypes.empty()); assert(ItemTypes.back() == toPrimType<T>()); ItemTypes.pop_back(); -#endif T *Ptr = &peekInternal<T>(); T Value = std::move(*Ptr); shrink(aligned_size<T>()); @@ -53,22 +48,20 @@ class InterpStack final { /// Discards the top value from the stack. template <typename T> void discard() { -#ifndef NDEBUG assert(!ItemTypes.empty()); assert(ItemTypes.back() == toPrimType<T>()); ItemTypes.pop_back(); -#endif T *Ptr = &peekInternal<T>(); - Ptr->~T(); + if constexpr (!std::is_trivially_destructible_v<T>) { + Ptr->~T(); + } shrink(aligned_size<T>()); } /// Returns a reference to the value on the top of the stack. template <typename T> T &peek() const { -#ifndef NDEBUG assert(!ItemTypes.empty()); assert(ItemTypes.back() == toPrimType<T>()); -#endif return peekInternal<T>(); } @@ -83,7 +76,7 @@ class InterpStack final { /// Returns the size of the stack in bytes. size_t size() const { return StackSize; } - /// Clears the stack without calling any destructors. + /// Clears the stack. void clear(); void clearTo(size_t NewSize); @@ -146,9 +139,11 @@ class InterpStack final { /// Total size of the stack. size_t StackSize = 0; -#ifndef NDEBUG - /// vector recording the type of data we pushed into the stack. - std::vector<PrimType> ItemTypes; + /// SmallVector recording the type of data we pushed into the stack. + /// We don't usually need this during normal code interpretation but + /// when aborting, we need type information to call the destructors + /// for what's left on the stack. + llvm::SmallVector<PrimType> ItemTypes; template <typename T> static constexpr PrimType toPrimType() { if constexpr (std::is_same_v<T, Pointer>) @@ -192,7 +187,6 @@ class InterpStack final { llvm_unreachable("unknown type push()'ed into InterpStack"); } -#endif }; } // namespace interp diff --git a/clang/lib/AST/ByteCode/InterpState.cpp b/clang/lib/AST/ByteCode/InterpState.cpp index b5f0f9a44f344..f89967759ff9b 100644 --- a/clang/lib/AST/ByteCode/InterpState.cpp +++ b/clang/lib/AST/ByteCode/InterpState.cpp @@ -45,6 +45,12 @@ InterpState::~InterpState() { while (DeadBlocks) { DeadBlock *Next = DeadBlocks->Next; + + // There might be a pointer in a global structure pointing to the dead + // block. + for (Pointer *P = DeadBlocks->B.Pointers; P; P = P->asBlockPointer().Next) + DeadBlocks->B.removePointer(P); + std::free(DeadBlocks); DeadBlocks = Next; } @@ -53,12 +59,6 @@ InterpState::~InterpState() { void InterpState::cleanup() { // As a last resort, make sure all pointers still pointing to a dead block // don't point to it anymore. - for (DeadBlock *DB = DeadBlocks; DB; DB = DB->Next) { - for (Pointer *P = DB->B.Pointers; P; P = P->asBlockPointer().Next) { - P->PointeeStorage.BS.Pointee = nullptr; - } - } - Alloc.cleanup(); } diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h index 1f6f1cbce5391..1dcdc0424801d 100644 --- a/clang/lib/AST/ByteCode/Pointer.h +++ b/clang/lib/AST/ByteCode/Pointer.h @@ -29,7 +29,7 @@ class DeadBlock; class Pointer; class Context; template <unsigned A, bool B> class Integral; -enum PrimType : unsigned; +enum PrimType : uint8_t; class Pointer; inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Pointer &P); diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h index 724da93ca1ef6..093084a8aad7b 100644 --- a/clang/lib/AST/ByteCode/PrimType.h +++ b/clang/lib/AST/ByteCode/PrimType.h @@ -31,7 +31,7 @@ template <bool Signed> class IntegralAP; template <unsigned Bits, bool Signed> class Integral; /// Enumeration of the primitive types of the VM. -enum PrimType : unsigned { +enum PrimType : uint8_t { PT_Sint8 = 0, PT_Uint8 = 1, PT_Sint16 = 2, @@ -51,14 +51,15 @@ enum PrimType : unsigned { // Like std::optional<PrimType>, but only sizeof(PrimType). class OptPrimType final { - unsigned V = ~0u; + static constexpr uint8_t None = 0xFF; + uint8_t V = None; public: OptPrimType() = default; OptPrimType(std::nullopt_t) {} OptPrimType(PrimType T) : V(static_cast<unsigned>(T)) {} - explicit constexpr operator bool() const { return V != ~0u; } + explicit constexpr operator bool() const { return V != None; } PrimType operator*() const { assert(operator bool()); return static_cast<PrimType>(V); diff --git a/clang/lib/AST/ByteCode/Program.h b/clang/lib/AST/ByteCode/Program.h index b63a70ed8113a..9c4e63a14d448 100644 --- a/clang/lib/AST/ByteCode/Program.h +++ b/clang/lib/AST/ByteCode/Program.h @@ -73,6 +73,10 @@ class Program final { return Globals[Idx]->block(); } + bool isGlobalInitialized(unsigned Index) const { + return getPtrGlobal(Index).isInitialized(); + } + /// Finds a global's index. std::optional<unsigned> getGlobal(const ValueDecl *VD); std::optional<unsigned> getGlobal(const Expr *E); diff --git a/clang/test/AST/ByteCode/c.c b/clang/test/AST/ByteCode/c.c index a7b1fe07f6d84..654b3da2b7d66 100644 --- a/clang/test/AST/ByteCode/c.c +++ b/clang/test/AST/ByteCode/c.c @@ -329,3 +329,12 @@ void foo3 (void) void* x = 0; void* y = &*x; } + +static void *FooTable[1] = { + [0] = (void *[1]) { // 1 + [0] = (void *[1]) { // 2 + [0] = (void *[1]) {} // pedantic-warning {{use of an empty initializer}} + }, + } +}; + _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits