On Wed, Oct 16, 2024 at 12:50:01AM +0200, Patrice Dumas wrote: > On Tue, Oct 15, 2024 at 12:31:07PM +0100, Gavin Smith wrote: > > On Mon, Oct 14, 2024 at 11:16:58PM +0200, Patrice Dumas wrote: > > > This looks good to me, it is quite simple. Maybe the number of buckets > > > could be set based on the number of sections + nodes + index entries? > > > > Great idea, I was concerned that I'd hardcoded the number of buckets. > > Could add the number of footnotes * 2 and the number of special units > (including special units not associated to a section/node). And > sections number should be * 3 for contents and shortcontents targets. > > Also maybe the buckets memory should be allocated to that same number, > (number of sections * 3 + number of headings + number of (nodes, > anchors, floats) + number of index entries + number of footnotes * 2 > + number of special units) as we know in advance there there will be > this number of registered ids.
I've started the code for this, but as I am not familiar with the XS conversion code at all, it would take me some time to work out how to get all of this information. I tried copying code from elsewhere to access self->document in html_converter_customize in html_prepare_converter.c, but this wasn't set yet. diff --git a/tp/Texinfo/XS/Makefile.am b/tp/Texinfo/XS/Makefile.am index 101038877b..9a9fcc622e 100644 --- a/tp/Texinfo/XS/Makefile.am +++ b/tp/Texinfo/XS/Makefile.am @@ -404,6 +404,8 @@ C_libtexinfo_convert_sources = \ convert/convert_html.c \ convert/format_html.h \ convert/format_html.c \ + convert/hashmap.c \ + convert/hashmap.h \ convert/html_converter_types.h \ convert/html_converter_init_options.c \ convert/html_converter_finish.c \ diff --git a/tp/Texinfo/XS/convert/converter.c b/tp/Texinfo/XS/convert/converter.c index e5d38c25ea..a6dc92b158 100644 --- a/tp/Texinfo/XS/convert/converter.c +++ b/tp/Texinfo/XS/convert/converter.c @@ -283,6 +283,8 @@ new_converter (enum converter_format format, unsigned long flags) /* set low level data representations options */ if (flags & CONVF_string_list) converter->ids_data_type = IDT_string_list; + else if (flags & CONVF_hashmap) + converter->ids_data_type = IDT_hashmap; #ifdef HAVE_CXX_HASHMAP else if (flags & CONVF_cxx_hashmap) converter->ids_data_type = IDT_cxx_hashmap; @@ -454,10 +456,8 @@ converter_converter (enum converter_format format, CONVERTER_INITIALIZATION_INFO *format_defaults; unsigned long flags; - /* NOTE if HAVE_CXX_HASHMAP is not set, even with CONVF_cxx_hashmap - string lists will be used */ if (!converter_flags) - flags = CONVF_cxx_hashmap; + flags = CONVF_hashmap; /* To use a string list. Slower. flags = CONVF_string_list; diff --git a/tp/Texinfo/XS/convert/get_converter_perl_info.c b/tp/Texinfo/XS/convert/get_converter_perl_info.c index 364bcb2af2..54f6ddc537 100644 --- a/tp/Texinfo/XS/convert/get_converter_perl_info.c +++ b/tp/Texinfo/XS/convert/get_converter_perl_info.c @@ -105,7 +105,7 @@ get_or_create_sv_converter (SV *converter_in, const char *input_class) } converter_descriptor = new_converter (converter_format, - CONVF_perl_hashmap); + CONVF_hashmap); /* CONVF_string_list); */ diff --git a/tp/Texinfo/XS/convert/hashmap.c b/tp/Texinfo/XS/convert/hashmap.c new file mode 100644 index 0000000000..fa19f3cbed --- /dev/null +++ b/tp/Texinfo/XS/convert/hashmap.c @@ -0,0 +1,170 @@ +/* Copyright 2024 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "converter_types.h" + +#include "hashmap.h" + +typedef struct BUCKET { + /* Linked list of strings. */ + char *string; + struct BUCKET *next; +} BUCKET; + +/* Allocator for bucket object. */ +#define BUCKETS_PER_ARENA 64 + +typedef struct BUCKET_ARENA { + BUCKET buckets[BUCKETS_PER_ARENA]; + int used; + struct BUCKET_ARENA *next; +} BUCKET_ARENA; + +typedef struct C_HASHMAP { + BUCKET **bucket; + int num_buckets; + long count; + + BUCKET_ARENA *arena; +} C_HASHMAP; + +static BUCKET * +new_bucket (C_HASHMAP *H) +{ + if (H->arena->used < BUCKETS_PER_ARENA) + return &H->arena->buckets[H->arena->used++]; + + BUCKET_ARENA *new_arena = malloc (sizeof (BUCKET_ARENA)); + memset (new_arena, 0, sizeof (BUCKET_ARENA)); + + /* Add to front of list. */ + new_arena->next = H->arena; + H->arena = new_arena; + + return &H->arena->buckets[H->arena->used++]; +} + + +static unsigned long +hash_string (const char *string, C_HASHMAP *H) +{ + unsigned int hash = 0; + + char c; + const char *pc = string; + + while ((c = *pc)) + { + hash *= 127; /* prime */ + hash += c; + pc++; + } + + hash %= H->num_buckets; + return hash; +} + +#define NBUCKETS 256 +void +init_registered_ids_c_hashmap (CONVERTER *self, int nbuckets) +{ + C_HASHMAP *H = malloc (sizeof (C_HASHMAP)); + memset (H, 0, sizeof (C_HASHMAP)); + + H->arena = malloc (sizeof (BUCKET_ARENA)); + memset (H->arena, 0, sizeof (BUCKET_ARENA)); + + if (nbuckets == 0) + nbuckets = NBUCKETS; + + H->num_buckets = nbuckets; + H->bucket = malloc (sizeof (BUCKET *) * nbuckets); + memset (H->bucket, 0, sizeof (BUCKET *) * nbuckets); + + self->registered_ids_c_hashmap = H; +} + +int +is_c_hashmap_registered_id (CONVERTER *self, const char *in_string) +{ + C_HASHMAP *H = (C_HASHMAP *)self->registered_ids_c_hashmap; + unsigned int hash = hash_string(in_string, H); + BUCKET *B = H->bucket[hash]; + + while (B) + { + if (!strcmp(B->string, in_string)) + return 1; + B = B->next; + } + + return 0; +} + +void +c_hashmap_register_id (CONVERTER *self, const char *in_string) +{ + C_HASHMAP *H = (C_HASHMAP *)self->registered_ids_c_hashmap; + + BUCKET *new = new_bucket(H); + new->string = strdup (in_string); + unsigned int hash = hash_string(in_string, H); + + /* Add to front of linked list. */ + new->next = H->bucket[hash]; + H->bucket[hash] = new; + + H->count++; +} + +void +clear_registered_ids_c_hashmap (CONVERTER *self) +{ + C_HASHMAP *H = (C_HASHMAP *)self->registered_ids_c_hashmap; + int i; + + BUCKET_ARENA *arena, *next; + /* Free chain. */ + next = H->arena; + while (next) + { + arena = next; + next = arena->next; + + for (i = 0; i < arena->used; i++) + { + free (arena->buckets[i].string); + } + free (arena); + } + + free (H->bucket); + memset (H, 0, sizeof (C_HASHMAP)); +} + +void +free_registered_ids_c_hashmap (CONVERTER *self) +{ + C_HASHMAP *H = (C_HASHMAP *)self->registered_ids_c_hashmap; + clear_registered_ids_c_hashmap (self); + free (H); +} + diff --git a/tp/Texinfo/XS/convert/hashmap.h b/tp/Texinfo/XS/convert/hashmap.h new file mode 100644 index 0000000000..a4cc63ad22 --- /dev/null +++ b/tp/Texinfo/XS/convert/hashmap.h @@ -0,0 +1,27 @@ +/* hashmap.h - declarations for hashmap.c */ +#ifndef HASHMAP_H +#define HASHMAP_H + +/* Copyright 2024 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +void init_registered_ids_c_hashmap (CONVERTER *self, int nbuckets); +int is_c_hashmap_registered_id (CONVERTER *self, const char *in_string); +void c_hashmap_register_id (CONVERTER *self, const char *in_string); +void clear_registered_ids_c_hashmap (CONVERTER *self); +void free_registered_ids_c_hashmap (CONVERTER *self); + + +#endif diff --git a/tp/Texinfo/XS/convert/html_converter_finish.c b/tp/Texinfo/XS/convert/html_converter_finish.c index 9ecbf80e63..d5bdf7eb33 100644 --- a/tp/Texinfo/XS/convert/html_converter_finish.c +++ b/tp/Texinfo/XS/convert/html_converter_finish.c @@ -31,6 +31,7 @@ #include "api_to_perl.h" #include "call_html_perl_function.h" #include "call_html_cxx_function.h" +#include "hashmap.h" /* html_reset_translated_special_unit_info_tree html_clear_direction_string_type */ #include "convert_html.h" @@ -161,6 +162,8 @@ html_reset_converter (CONVERTER *self) if (self->ids_data_type == IDT_perl_hashmap) clear_registered_ids_hv (self); + else if (self->ids_data_type == IDT_hashmap) + clear_registered_ids_c_hashmap (self); #ifdef HAVE_CXX_HASHMAP else if (self->ids_data_type == IDT_cxx_hashmap) clear_registered_ids_hashmap (self); @@ -342,6 +345,8 @@ html_free_converter (CONVERTER *self) if (self->ids_data_type == IDT_perl_hashmap) free_registered_ids_hv (self); + else if (self->ids_data_type == IDT_hashmap) + free_registered_ids_c_hashmap (self); #ifdef HAVE_CXX_HASHMAP else if (self->ids_data_type == IDT_cxx_hashmap) free_registered_ids_hashmap (self); diff --git a/tp/Texinfo/XS/convert/html_prepare_converter.c b/tp/Texinfo/XS/convert/html_prepare_converter.c index 3d786e1c1f..20a7af47f9 100644 --- a/tp/Texinfo/XS/convert/html_prepare_converter.c +++ b/tp/Texinfo/XS/convert/html_prepare_converter.c @@ -55,6 +55,7 @@ #include "converter.h" #include "call_html_perl_function.h" #include "call_html_cxx_function.h" +#include "hashmap.h" #include "format_html.h" /* html_complete_no_arg_commands_formatting html_run_stage_handlers html_add_to_files_source_info html_find_file_source_info @@ -1721,6 +1722,14 @@ html_converter_customize (CONVERTER *self) if (self->ids_data_type == IDT_perl_hashmap) init_registered_ids_hv (self); + else if (self->ids_data_type == IDT_hashmap) + { + int predicted_values = 0; + if (self->document) + self->document->global_commands.footnotes.number;; + init_registered_ids_c_hashmap (self, predicted_values); + printf ("init hash %d bins\n", predicted_values); + } #ifdef HAVE_CXX_HASHMAP else if (self->ids_data_type == IDT_cxx_hashmap) init_registered_ids_hashmap (self); @@ -3768,6 +3777,8 @@ html_id_is_registered (CONVERTER *self, const char *string) { if (self->ids_data_type == IDT_perl_hashmap) return is_hv_registered_id (self, string); + else if (self->ids_data_type == IDT_hashmap) + return is_c_hashmap_registered_id (self, string); #ifdef HAVE_CXX_HASHMAP else if (self->ids_data_type == IDT_cxx_hashmap) return is_hashmap_registered_id (self, string); @@ -3781,6 +3792,8 @@ html_register_id (CONVERTER *self, const char *string) { if (self->ids_data_type == IDT_perl_hashmap) hv_register_id (self, string); + else if (self->ids_data_type == IDT_hashmap) + c_hashmap_register_id (self, string); #ifdef HAVE_CXX_HASHMAP else if (self->ids_data_type == IDT_cxx_hashmap) hashmap_register_id (self, string); diff --git a/tp/Texinfo/XS/main/converter_types.h b/tp/Texinfo/XS/main/converter_types.h index 16a14328ee..cca8cf20b9 100644 --- a/tp/Texinfo/XS/main/converter_types.h +++ b/tp/Texinfo/XS/main/converter_types.h @@ -42,12 +42,14 @@ enum ids_data_type { IDT_perl_hashmap, IDT_cxx_hashmap, IDT_string_list, + IDT_hashmap, }; /* converter low level customization */ #define CONVF_perl_hashmap 0x0001 #define CONVF_string_list 0x0002 #define CONVF_cxx_hashmap 0x0004 +#define CONVF_hashmap 0x0008 /* for string information passing to/from perl */ enum sv_string_type { @@ -883,6 +885,7 @@ typedef struct CONVERTER { STRING_LIST *registered_ids; /* actually HV * but we do not want to drag in Perl headers */ void *registered_ids_hv; + void *registered_ids_c_hashmap; #ifdef HAVE_CXX_HASHMAP /* a pointer on C++ data */ void *registered_ids_hashmap; diff --git a/tp/Texinfo/XS/teximakehtml.c b/tp/Texinfo/XS/teximakehtml.c index 16029d4bd8..08d0185ef8 100644 --- a/tp/Texinfo/XS/teximakehtml.c +++ b/tp/Texinfo/XS/teximakehtml.c @@ -312,8 +312,7 @@ main (int argc, char *argv[]) program_file, &converter_texinfo_language_config_dirs, &convert_options, - /* default, use C++ hashmap if available */ - 0); + CONVF_hashmap); /* to test linear search CONVF_string_list); */