> I'd be curious to see the same test in C, if someone(TM) had the time.
My C is a bit rusty, but I gave Claude a whirl and it generated something that
did the job perfectly. The difference was not quite as stark, but thread
queries were still ~50% slower.
I've attached the code Claude generated for the curious — run as
./notmuch-bench-something ~/.notmuch-db "date:2025"
> I agree twice as slow is not reasonable. I do expect it to be slower
> because of extra work that notmuch is doing (outside xapian) to
> construct the thread structures (parenting nodes turns out to be a bit
> non-trivial). I did also spend some effort tuning the thread:{}
> queries; the others I think have not looked at as closely.
This isn't even using thread:{}, it's just making sequential queries for each
individual thread (the C version, the python version concatenates all thread
IDs
into a massive "thread: or thread: or thread:..." query).
At least for the C version, the performance gap narrows the more threads there
are, which makes sense given that each new thread causes a new query in this
very unoptimized version. But altogether, retrieving threads is substantially
slower than messages.
Cheers,
Lars
#include <notmuch.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char **tags;
size_t count;
size_t capacity;
} tag_array_t;
typedef struct {
char **thread_ids;
size_t count;
size_t capacity;
} thread_id_array_t;
static void init_tag_array(tag_array_t *arr, size_t initial_capacity) {
arr->tags = malloc(initial_capacity * sizeof(char *));
arr->count = 0;
arr->capacity = initial_capacity;
}
static void init_thread_id_array(thread_id_array_t *arr, size_t initial_capacity) {
arr->thread_ids = malloc(initial_capacity * sizeof(char *));
arr->count = 0;
arr->capacity = initial_capacity;
}
static void add_unique_tag(tag_array_t *arr, const char *tag) {
// Check if tag already exists
for (size_t i = 0; i < arr->count; i++) {
if (strcmp(arr->tags[i], tag) == 0) {
return;
}
}
// Resize if necessary
if (arr->count >= arr->capacity) {
arr->capacity *= 2;
arr->tags = realloc(arr->tags, arr->capacity * sizeof(char *));
}
// Add new tag
arr->tags[arr->count] = strdup(tag);
arr->count++;
}
static void add_unique_thread_id(thread_id_array_t *arr, const char *thread_id) {
// Check if thread_id already exists
for (size_t i = 0; i < arr->count; i++) {
if (strcmp(arr->thread_ids[i], thread_id) == 0) {
return;
}
}
// Resize if necessary
if (arr->count >= arr->capacity) {
arr->capacity *= 2;
arr->thread_ids = realloc(arr->thread_ids, arr->capacity * sizeof(char *));
}
// Add new thread_id
arr->thread_ids[arr->count] = strdup(thread_id);
arr->count++;
}
static void free_tag_array(tag_array_t *arr) {
for (size_t i = 0; i < arr->count; i++) {
free(arr->tags[i]);
}
free(arr->tags);
arr->tags = NULL;
arr->count = arr->capacity = 0;
}
static void free_thread_id_array(thread_id_array_t *arr) {
for (size_t i = 0; i < arr->count; i++) {
free(arr->thread_ids[i]);
}
free(arr->thread_ids);
arr->thread_ids = NULL;
arr->count = arr->capacity = 0;
}
static void process_message_tags(notmuch_message_t *message, tag_array_t *result) {
notmuch_tags_t *tags = notmuch_message_get_tags(message);
for (; notmuch_tags_valid(tags); notmuch_tags_move_to_next(tags)) {
const char *tag = notmuch_tags_get(tags);
add_unique_tag(result, tag);
}
notmuch_tags_destroy(tags);
}
tag_array_t *get_unique_tags(const char *db_path, const char *query_string) {
notmuch_database_t *db = NULL;
notmuch_query_t *query = NULL;
notmuch_messages_t *messages = NULL;
thread_id_array_t thread_ids;
tag_array_t *result = malloc(sizeof(tag_array_t));
init_tag_array(result, 16);
init_thread_id_array(&thread_ids, 16);
// Open the database
notmuch_status_t status = notmuch_database_open(db_path,
NOTMUCH_DATABASE_MODE_READ_ONLY,
&db);
if (status != NOTMUCH_STATUS_SUCCESS) {
fprintf(stderr, "Failed to open database\n");
free(result);
return NULL;
}
// First query: get messages matching the search criteria
query = notmuch_query_create(db, query_string);
if (!query) {
notmuch_database_destroy(db);
free(result);
return NULL;
}
// Get messages
status = notmuch_query_search_messages(query, &messages);
if (status != NOTMUCH_STATUS_SUCCESS) {
notmuch_query_destroy(query);
notmuch_database_destroy(db);
free(result);
return NULL;
}
// Collect thread IDs from matching messages
for (; notmuch_messages_valid(messages); notmuch_messages_move_to_next(messages)) {
notmuch_message_t *message = notmuch_messages_get(messages);
const char *thread_id = notmuch_message_get_thread_id(message);
add_unique_thread_id(&thread_ids, thread_id);
process_message_tags(message, result);
notmuch_message_destroy(message);
}
notmuch_messages_destroy(messages);
notmuch_query_destroy(query);
// Second query: get all messages from collected thread IDs
for (size_t i = 0; i < thread_ids.count; i++) {
char *thread_query = malloc(strlen("thread:") + strlen(thread_ids.thread_ids[i]) + 1);
sprintf(thread_query, "thread:%s", thread_ids.thread_ids[i]);
query = notmuch_query_create(db, thread_query);
if (query) {
status = notmuch_query_search_messages(query, &messages);
if (status == NOTMUCH_STATUS_SUCCESS) {
for (; notmuch_messages_valid(messages); notmuch_messages_move_to_next(messages)) {
notmuch_message_t *message = notmuch_messages_get(messages);
process_message_tags(message, result);
notmuch_message_destroy(message);
}
notmuch_messages_destroy(messages);
}
notmuch_query_destroy(query);
}
free(thread_query);
}
// Cleanup
free_thread_id_array(&thread_ids);
notmuch_database_destroy(db);
return result;
}
// Example usage:
int main(int argc, char **argv) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <notmuch-db-path> <query>\n", argv[0]);
return 1;
}
tag_array_t *tags = get_unique_tags(argv[1], argv[2]);
if (!tags) {
fprintf(stderr, "Failed to get tags\n");
return 1;
}
printf("Found %zu unique tags:\n", tags->count);
//for (size_t i = 0; i < tags->count; i++) {
//printf("%s\n", tags->tags[i]);
//}
free_tag_array(tags);
free(tags);
return 0;
}
#include <notmuch.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char **tags;
size_t count;
size_t capacity;
} tag_array_t;
static void init_tag_array(tag_array_t *arr, size_t initial_capacity) {
arr->tags = malloc(initial_capacity * sizeof(char *));
arr->count = 0;
arr->capacity = initial_capacity;
}
static void add_unique_tag(tag_array_t *arr, const char *tag) {
// Check if tag already exists
for (size_t i = 0; i < arr->count; i++) {
if (strcmp(arr->tags[i], tag) == 0) {
return;
}
}
// Resize if necessary
if (arr->count >= arr->capacity) {
arr->capacity *= 2;
arr->tags = realloc(arr->tags, arr->capacity * sizeof(char *));
}
// Add new tag
arr->tags[arr->count] = strdup(tag);
arr->count++;
}
static void free_tag_array(tag_array_t *arr) {
for (size_t i = 0; i < arr->count; i++) {
free(arr->tags[i]);
}
free(arr->tags);
arr->tags = NULL;
arr->count = arr->capacity = 0;
}
tag_array_t *get_unique_tags(const char *db_path, const char *query_string) {
notmuch_database_t *db = NULL;
notmuch_query_t *query = NULL;
notmuch_threads_t *threads = NULL;
notmuch_messages_t *messages = NULL;
notmuch_tags_t *tags = NULL;
tag_array_t *result = malloc(sizeof(tag_array_t));
init_tag_array(result, 16); // Start with space for 16 tags
// Open the database
notmuch_status_t status = notmuch_database_open(db_path,
NOTMUCH_DATABASE_MODE_READ_ONLY,
&db);
if (status != NOTMUCH_STATUS_SUCCESS) {
fprintf(stderr, "Failed to open database\n");
free(result);
return NULL;
}
// Create and execute query
query = notmuch_query_create(db, query_string);
if (!query) {
notmuch_database_destroy(db);
free(result);
return NULL;
}
// Get threads
status = notmuch_query_search_threads(query, &threads);
if (status != NOTMUCH_STATUS_SUCCESS) {
notmuch_query_destroy(query);
notmuch_database_destroy(db);
free(result);
return NULL;
}
// Iterate over threads
for (; notmuch_threads_valid(threads); notmuch_threads_move_to_next(threads)) {
notmuch_thread_t *thread = notmuch_threads_get(threads);
// Get messages in thread
messages = notmuch_thread_get_messages(thread);
// Iterate over messages
for (; notmuch_messages_valid(messages); notmuch_messages_move_to_next(messages)) {
notmuch_message_t *message = notmuch_messages_get(messages);
// Get tags for message
tags = notmuch_message_get_tags(message);
// Add each tag to our result array
for (; notmuch_tags_valid(tags); notmuch_tags_move_to_next(tags)) {
const char *tag = notmuch_tags_get(tags);
add_unique_tag(result, tag);
}
notmuch_tags_destroy(tags);
notmuch_message_destroy(message);
}
notmuch_messages_destroy(messages);
notmuch_thread_destroy(thread);
}
// Cleanup
notmuch_threads_destroy(threads);
notmuch_query_destroy(query);
notmuch_database_destroy(db);
return result;
}
// Example usage:
int main(int argc, char **argv) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <notmuch-db-path> <query>\n", argv[0]);
return 1;
}
tag_array_t *tags = get_unique_tags(argv[1], argv[2]);
if (!tags) {
fprintf(stderr, "Failed to get tags\n");
return 1;
}
printf("Found %zu unique tags:\n", tags->count);
// for (size_t i = 0; i < tags->count; i++) {
// printf("%s\n", tags->tags[i]);
// }
free_tag_array(tags);
free(tags);
return 0;
}
_______________________________________________
notmuch mailing list -- [email protected]
To unsubscribe send an email to [email protected]