https://github.com/python/cpython/commit/81ef1b73175378543534ebe1f148c22deb8239b3
commit: 81ef1b73175378543534ebe1f148c22deb8239b3
branch: main
author: Hai Zhu <[email protected]>
committer: markshannon <[email protected]>
date: 2026-03-16T15:58:18Z
summary:

 gh-144888: Replace bloom filter linked lists with continuous arrays to 
optimize executor invalidating performance (GH-145873)

files:
M Include/internal/pycore_interp_structs.h
M Include/internal/pycore_optimizer.h
M InternalDocs/jit.md
M Modules/_testinternalcapi.c
M Objects/funcobject.c
M Python/jit.c
M Python/optimizer.c
M Python/pylifecycle.c
M Python/pystate.c

diff --git a/Include/internal/pycore_interp_structs.h 
b/Include/internal/pycore_interp_structs.h
index 776fb9575c2365..e2008f8303e21b 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -14,6 +14,7 @@ extern "C" {
 #include "pycore_structs.h"       // PyHamtObject
 #include "pycore_tstate.h"        // _PyThreadStateImpl
 #include "pycore_typedefs.h"      // _PyRuntimeState
+#include "pycore_uop.h"           // _PyBloomFilter
 
 #define CODE_MAX_WATCHERS 8
 #define CONTEXT_MAX_WATCHERS 8
@@ -972,7 +973,10 @@ struct _is {
 
     // Optimization configuration (thresholds and flags for JIT and 
interpreter)
     _PyOptimizationConfig opt_config;
-    struct _PyExecutorObject *executor_list_head;
+    _PyBloomFilter *executor_blooms;             // Contiguous bloom filter 
array
+    struct _PyExecutorObject **executor_ptrs;    // Corresponding executor 
pointer array
+    size_t executor_count;                       // Number of valid executors
+    size_t executor_capacity;                    // Array capacity
     struct _PyExecutorObject *executor_deletion_list_head;
     struct _PyExecutorObject *cold_executor;
     struct _PyExecutorObject *cold_dynamic_executor;
diff --git a/Include/internal/pycore_optimizer.h 
b/Include/internal/pycore_optimizer.h
index c63f0167a0f64a..cea9fcce237301 100644
--- a/Include/internal/pycore_optimizer.h
+++ b/Include/internal/pycore_optimizer.h
@@ -128,8 +128,8 @@ typedef struct {
     bool cold;
     uint8_t pending_deletion;
     int32_t index;           // Index of ENTER_EXECUTOR (if code isn't NULL, 
below).
-    _PyBloomFilter bloom;
-    _PyExecutorLinkListNode links;
+    int32_t bloom_array_idx;        // Index in 
interp->executor_blooms/executor_ptrs.
+    _PyExecutorLinkListNode links;  // Used by deletion list.
     PyCodeObject *code;  // Weak (NULL if no corresponding ENTER_EXECUTOR).
 } _PyVMData;
 
@@ -157,7 +157,7 @@ typedef struct _PyExecutorObject {
 // Export for '_opcode' shared extension (JIT compiler).
 PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset);
 
-void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
+int _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
 void _Py_ExecutorDetach(_PyExecutorObject *);
 void _Py_BloomFilter_Init(_PyBloomFilter *);
 void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
diff --git a/InternalDocs/jit.md b/InternalDocs/jit.md
index 1740b22b85f77b..decfccad2d8d37 100644
--- a/InternalDocs/jit.md
+++ b/InternalDocs/jit.md
@@ -78,8 +78,8 @@ and execution returns to the adaptive interpreter.
 ## Invalidating Executors
 
 In addition to being stored on the code object, each executor is also
-inserted into a list of all executors, which is stored in the interpreter
-state's `executor_list_head` field. This list is used when it is necessary
+inserted into contiguous arrays (`executor_blooms` and `executor_ptrs`)
+stored in the interpreter state. These arrays are used when it is necessary
 to invalidate executors because values they used in their construction may
 have changed.
 
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index aa5911ef2fb449..8c316b7c8ddda0 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -278,10 +278,8 @@ get_jit_code_ranges(PyObject *self, PyObject 
*Py_UNUSED(args))
     if (interp == NULL) {
         return ranges;
     }
-    for (_PyExecutorObject *exec = interp->executor_list_head;
-         exec != NULL;
-         exec = exec->vm_data.links.next)
-    {
+    for (size_t i = 0; i < interp->executor_count; i++) {
+        _PyExecutorObject *exec = interp->executor_ptrs[i];
         if (exec->jit_code == NULL || exec->jit_size == 0) {
             continue;
         }
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index fc32826fb3a861..a7a3d9c78ef3d0 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -12,7 +12,6 @@
 #include "pycore_setobject.h"     // _PySet_NextEntry()
 #include "pycore_stats.h"
 #include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
-#include "pycore_optimizer.h"     // _Py_Executors_InvalidateDependency
 
 static const char *
 func_event_name(PyFunction_WatchEvent event) {
diff --git a/Python/jit.c b/Python/jit.c
index 3e0a0aa8bfcc81..4990c743224d3c 100644
--- a/Python/jit.c
+++ b/Python/jit.c
@@ -62,6 +62,23 @@ jit_error(const char *message)
 
 static size_t _Py_jit_shim_size = 0;
 
+static int
+address_in_executor_array(_PyExecutorObject **ptrs, size_t count, uintptr_t 
addr)
+{
+    for (size_t i = 0; i < count; i++) {
+        _PyExecutorObject *exec = ptrs[i];
+        if (exec->jit_code == NULL || exec->jit_size == 0) {
+            continue;
+        }
+        uintptr_t start = (uintptr_t)exec->jit_code;
+        uintptr_t end = start + exec->jit_size;
+        if (addr >= start && addr < end) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
 static int
 address_in_executor_list(_PyExecutorObject *head, uintptr_t addr)
 {
@@ -94,7 +111,7 @@ _PyJIT_AddressInJitCode(PyInterpreterState *interp, 
uintptr_t addr)
             return 1;
         }
     }
-    if (address_in_executor_list(interp->executor_list_head, addr)) {
+    if (address_in_executor_array(interp->executor_ptrs, 
interp->executor_count, addr)) {
         return 1;
     }
     if (address_in_executor_list(interp->executor_deletion_list_head, addr)) {
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 7315bb6b9f603d..a9e788d0dcb1d5 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -1379,7 +1379,10 @@ make_executor_from_uops(_PyThreadStateImpl *tstate, 
_PyUOpInstruction *buffer, i
     // linking of executor. Otherwise, the GC tries to untrack a
     // still untracked object during dealloc.
     _PyObject_GC_TRACK(executor);
-    _Py_ExecutorInit(executor, dependencies);
+    if (_Py_ExecutorInit(executor, dependencies) < 0) {
+        Py_DECREF(executor);
+        return NULL;
+    }
 #ifdef Py_DEBUG
     char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
     int lltrace = 0;
@@ -1646,59 +1649,63 @@ bloom_filter_may_contain(_PyBloomFilter *bloom, 
_PyBloomFilter *hashes)
     return true;
 }
 
-static void
-link_executor(_PyExecutorObject *executor)
+static int
+link_executor(_PyExecutorObject *executor, const _PyBloomFilter *bloom)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    _PyExecutorLinkListNode *links = &executor->vm_data.links;
-    _PyExecutorObject *head = interp->executor_list_head;
-    if (head == NULL) {
-        interp->executor_list_head = executor;
-        links->previous = NULL;
-        links->next = NULL;
-    }
-    else {
-        assert(head->vm_data.links.previous == NULL);
-        links->previous = NULL;
-        links->next = head;
-        head->vm_data.links.previous = executor;
-        interp->executor_list_head = executor;
-    }
-    /* executor_list_head must be first in list */
-    assert(interp->executor_list_head->vm_data.links.previous == NULL);
+    if (interp->executor_count == interp->executor_capacity) {
+        size_t new_cap = interp->executor_capacity ? interp->executor_capacity 
* 2 : 64;
+        _PyBloomFilter *new_blooms = PyMem_Realloc(
+            interp->executor_blooms, new_cap * sizeof(_PyBloomFilter));
+        if (new_blooms == NULL) {
+            return -1;
+        }
+        _PyExecutorObject **new_ptrs = PyMem_Realloc(
+            interp->executor_ptrs, new_cap * sizeof(_PyExecutorObject *));
+        if (new_ptrs == NULL) {
+            /* Revert blooms realloc — the old pointer may have been freed by
+             * a successful realloc, but new_blooms is the valid pointer. */
+            interp->executor_blooms = new_blooms;
+            return -1;
+        }
+        interp->executor_blooms = new_blooms;
+        interp->executor_ptrs = new_ptrs;
+        interp->executor_capacity = new_cap;
+    }
+    size_t idx = interp->executor_count++;
+    interp->executor_blooms[idx] = *bloom;
+    interp->executor_ptrs[idx] = executor;
+    executor->vm_data.bloom_array_idx = (int32_t)idx;
+    return 0;
 }
 
 static void
 unlink_executor(_PyExecutorObject *executor)
 {
-    _PyExecutorLinkListNode *links = &executor->vm_data.links;
-    _PyExecutorObject *next = links->next;
-    _PyExecutorObject *prev = links->previous;
-    if (next != NULL) {
-        next->vm_data.links.previous = prev;
-    }
-    if (prev != NULL) {
-        prev->vm_data.links.next = next;
-    }
-    else {
-        // prev == NULL implies that executor is the list head
-        PyInterpreterState *interp = PyInterpreterState_Get();
-        assert(interp->executor_list_head == executor);
-        interp->executor_list_head = next;
+    PyInterpreterState *interp = PyInterpreterState_Get();
+    int32_t idx = executor->vm_data.bloom_array_idx;
+    assert(idx >= 0 && (size_t)idx < interp->executor_count);
+    size_t last = --interp->executor_count;
+    if ((size_t)idx != last) {
+        /* Swap-remove: move the last element into the vacated slot */
+        interp->executor_blooms[idx] = interp->executor_blooms[last];
+        interp->executor_ptrs[idx] = interp->executor_ptrs[last];
+        interp->executor_ptrs[idx]->vm_data.bloom_array_idx = idx;
     }
+    executor->vm_data.bloom_array_idx = -1;
 }
 
 /* This must be called by optimizers before using the executor */
-void
+int
 _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter 
*dependency_set)
 {
     executor->vm_data.valid = true;
     executor->vm_data.pending_deletion = 0;
     executor->vm_data.code = NULL;
-    for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) {
-        executor->vm_data.bloom.bits[i] = dependency_set->bits[i];
+    if (link_executor(executor, dependency_set) < 0) {
+        return -1;
     }
-    link_executor(executor);
+    return 0;
 }
 
 static _PyExecutorObject *
@@ -1809,11 +1816,15 @@ void
 _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj)
 {
     assert(executor->vm_data.valid);
-    _Py_BloomFilter_Add(&executor->vm_data.bloom, obj);
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    int32_t idx = executor->vm_data.bloom_array_idx;
+    assert(idx >= 0 && (size_t)idx < interp->executor_count);
+    _Py_BloomFilter_Add(&interp->executor_blooms[idx], obj);
 }
 
 /* Invalidate all executors that depend on `obj`
- * May cause other executors to be invalidated as well
+ * May cause other executors to be invalidated as well.
+ * Uses contiguous bloom filter array for cache-friendly scanning.
  */
 void
 _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int 
is_invalidation)
@@ -1821,23 +1832,20 @@ _Py_Executors_InvalidateDependency(PyInterpreterState 
*interp, void *obj, int is
     _PyBloomFilter obj_filter;
     _Py_BloomFilter_Init(&obj_filter);
     _Py_BloomFilter_Add(&obj_filter, obj);
-    /* Walk the list of executors */
-    /* TO DO -- Use a tree to avoid traversing as many objects */
+    /* Scan contiguous bloom filter array */
     PyObject *invalidate = PyList_New(0);
     if (invalidate == NULL) {
         goto error;
     }
     /* Clearing an executor can clear others, so we need to make a list of
      * executors to invalidate first */
-    for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
-        assert(exec->vm_data.valid);
-        _PyExecutorObject *next = exec->vm_data.links.next;
-        if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter) &&
-            PyList_Append(invalidate, (PyObject *)exec))
+    for (size_t i = 0; i < interp->executor_count; i++) {
+        assert(interp->executor_ptrs[i]->vm_data.valid);
+        if (bloom_filter_may_contain(&interp->executor_blooms[i], &obj_filter) 
&&
+            PyList_Append(invalidate, (PyObject *)interp->executor_ptrs[i]))
         {
             goto error;
         }
-        exec = next;
     }
     for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
         PyObject *exec = PyList_GET_ITEM(invalidate, i);
@@ -1859,8 +1867,9 @@ _Py_Executors_InvalidateDependency(PyInterpreterState 
*interp, void *obj, int is
 void
 _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
 {
-    while (interp->executor_list_head) {
-        _PyExecutorObject *executor = interp->executor_list_head;
+    while (interp->executor_count > 0) {
+        /* Invalidate from the end to avoid repeated swap-remove shifts */
+        _PyExecutorObject *executor = 
interp->executor_ptrs[interp->executor_count - 1];
         assert(executor->vm_data.valid);
         if (executor->vm_data.code) {
             // Clear the entire code object so its co_executors array be freed:
@@ -1878,8 +1887,7 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, 
int is_invalidation)
 void
 _Py_Executors_InvalidateCold(PyInterpreterState *interp)
 {
-    /* Walk the list of executors */
-    /* TO DO -- Use a tree to avoid traversing as many objects */
+    /* Scan contiguous executor array */
     PyObject *invalidate = PyList_New(0);
     if (invalidate == NULL) {
         goto error;
@@ -1887,9 +1895,9 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp)
 
     /* Clearing an executor can deallocate others, so we need to make a list of
      * executors to invalidate first */
-    for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
+    for (size_t i = 0; i < interp->executor_count; i++) {
+        _PyExecutorObject *exec = interp->executor_ptrs[i];
         assert(exec->vm_data.valid);
-        _PyExecutorObject *next = exec->vm_data.links.next;
 
         if (exec->vm_data.cold && PyList_Append(invalidate, (PyObject *)exec) 
< 0) {
             goto error;
@@ -1897,8 +1905,6 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp)
         else {
             exec->vm_data.cold = true;
         }
-
-        exec = next;
     }
     for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
         PyObject *exec = PyList_GET_ITEM(invalidate, i);
@@ -2142,9 +2148,8 @@ _PyDumpExecutors(FILE *out)
     fprintf(out, "    rankdir = \"LR\"\n\n");
     fprintf(out, "    node [colorscheme=greys9]\n");
     PyInterpreterState *interp = PyInterpreterState_Get();
-    for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
-        executor_to_gv(exec, out);
-        exec = exec->vm_data.links.next;
+    for (size_t i = 0; i < interp->executor_count; i++) {
+        executor_to_gv(interp->executor_ptrs[i], out);
     }
     fprintf(out, "}\n\n");
     return 0;
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 2b8e9a02cb6184..68052a91d6a1f1 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1761,6 +1761,12 @@ finalize_modules(PyThreadState *tstate)
     interp->compiling = false;
 #ifdef _Py_TIER2
     _Py_Executors_InvalidateAll(interp, 0);
+    PyMem_Free(interp->executor_blooms);
+    PyMem_Free(interp->executor_ptrs);
+    interp->executor_blooms = NULL;
+    interp->executor_ptrs = NULL;
+    interp->executor_count = 0;
+    interp->executor_capacity = 0;
 #endif
 
     // Stop watching __builtin__ modifications
diff --git a/Python/pystate.c b/Python/pystate.c
index 17b8430b19c188..fcb73b4dcefe1d 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -597,7 +597,10 @@ init_interpreter(PyInterpreterState *interp,
     interp->_code_object_generation = 0;
     interp->jit = false;
     interp->compiling = false;
-    interp->executor_list_head = NULL;
+    interp->executor_blooms = NULL;
+    interp->executor_ptrs = NULL;
+    interp->executor_count = 0;
+    interp->executor_capacity = 0;
     interp->executor_deletion_list_head = NULL;
     interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD;
 

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to