The unicode-font patch will need to retrieve the name of a given glyph. This is necessary because when the font is for example asked for the width of the glyph { 1234 } representing the Unicode character "u2219", the index 1234 alone is not enough information. Before the unicode-font patch, the font registered all possible Unicode characters in a big table before use; now this process will be lazy, i.e. the font instance gets to know about the Unicode character "u2219" not before someone calls font::contains of the corresponding glyph.
So we need a backward mapping from glyph to name. Since the name takes up some memory, it is convenient to store each name only once, not twice per glyph. This is easy for src/roff/troff/input.cpp: the charinfo class contains already a pointer to the name. For src/libs/libgroff/nametoindex.cpp we can avoid an extra redundant storage of the name by observing that the hash table (mapping each name to an int[1] containing the index) already contains a copy of the name with indefinite extent. We only need to make the pointer to this copy accessible: a simple modification to the 'define' method of PTABLE and a variant of the 'lookup' method of PTABLE do this. This patch doesn't introduce any noticeable slowdown. 2006-02-07 Bruno Haible <[EMAIL PROTECTED]> New accessor method glyph_t::glyph_name(). * src/include/ptable.h (declare_ptable): Add a return value to the 'define' method, and declare a 'lookupassoc' method. (implement_ptable): Return the stored key in 'define'. Implement lookupassoc. * src/include/font.h (glyph_t): Add 'name' field. Add an argument to the constructor. (glyph_t::glyph_name): New method. * src/libs/libgroff/nametoindex.cpp (character_indexer): Change return type of methods and field member type to glyph_t. (character_indexer::character_indexer): Update. (character_indexer::ascii_char_index): Allocate a name for the glyph. Return a glyph_t with name. (character_indexer::numbered_char_index): Return a glyph_t without a name. (character_indexer::named_char_index): Return a glyph_t with a name. (font::number_to_index, font::name_to_index): Update. * src/roff/troff/input.cpp (charinfo::charinfo): Use the symbol as the glyph's name. diff -r -c3 groff-20060204.orig/src/include/ptable.h groff-20060204/src/include/ptable.h --- groff-20060204.orig/src/include/ptable.h 2006-01-26 16:58:44.000000000 +0100 +++ groff-20060204/src/include/ptable.h 2006-02-08 00:03:53.000000000 +0100 @@ -88,11 +88,20 @@ PTABLE(T)(); /* Create an empty table. */ \ ~PTABLE(T)(); /* Delete a table, including its \ values. */ \ - void define(const char *, T *); /* Define the value (arg2) for a key \ - (arg1). */ \ + const char *define(const char *, T *);/* Define the value (arg2) for a key \ + (arg1). Return the copy in the \ + table of the key (arg1), or \ + possibly NULL if the value (arg2) \ + is NULL. */ \ T *lookup(const char *); /* Return a pointer to the value of \ the given key, if found in the \ table, or NULL otherwise. */ \ + T *lookupassoc(const char **); /* Return a pointer to the value of \ + the given key, passed by reference,\ + and replace the key argument with \ + the copy found in the table, if \ + the key is found in the table. \ + Return NULL otherwise. */ \ friend class PTABLE_ITERATOR(T); \ }; @@ -125,7 +134,7 @@ a_delete v; \ } \ \ -void PTABLE(T)::define(const char *key, T *val) \ +const char *PTABLE(T)::define(const char *key, T *val) \ { \ assert(key != 0); \ unsigned long h = hash_string(key); \ @@ -136,10 +145,10 @@ if (strcmp(v[n].key, key) == 0) { \ a_delete v[n].val; \ v[n].val = val; \ - return; \ + return v[n].key; \ } \ if (val == 0) \ - return; \ + return 0; \ if (used*FULL_DEN >= size*FULL_NUM) { \ PASSOC(T) *oldv = v; \ unsigned old_size = size; \ @@ -170,6 +179,7 @@ v[n].key = temp; \ v[n].val = val; \ used++; \ + return temp; \ } \ \ T *PTABLE(T)::lookup(const char *key) \ @@ -183,6 +193,20 @@ return 0; \ } \ \ +T *PTABLE(T)::lookupassoc(const char **keyptr) \ +{ \ + const char *key = *keyptr; \ + assert(key != 0); \ + for (unsigned n = unsigned(hash_string(key) % size); \ + v[n].key != 0; \ + n = (n == 0 ? size - 1 : n - 1)) \ + if (strcmp(v[n].key, key) == 0) { \ + *keyptr = v[n].key; \ + return v[n].val; \ + } \ + return 0; \ +} \ + \ PTABLE_ITERATOR(T)::PTABLE_ITERATOR(T)(PTABLE(T) *t) \ : p(t), i(0) \ { \ diff -r -c3 groff-20060204.orig/src/include/font.h groff-20060204/src/include/font.h --- groff-20060204.orig/src/include/font.h 2006-02-06 02:34:12.000000000 +0100 +++ groff-20060204/src/include/font.h 2006-02-07 23:33:31.000000000 +0100 @@ -29,33 +29,42 @@ // A glyph is represented by a font-independent glyph_t object. // The functions font::name_to_index and font::number_to_index return such // an object. +// There are two types of glyphs: +// - those with a name, and among these in particular: +// "charNNN" denoting a single 'char' in the input character set, +// "uXXXX" denoting a Unicode character, +// - those with a number, referring to the the font-dependent glyph with +// the given number. struct glyph_t { private: int index; // A font-independent integer value. + const char *name; // Glyph name, statically allocated. friend class font; + friend class character_indexer; friend class charinfo; - glyph_t(int); // Glyph with given index. + glyph_t(int, const char *); // Glyph with given index and name. public: glyph_t(); // Uninitialized glyph. static glyph_t undefined_glyph(); // Undefined glyph. int glyph_index(); + const char *glyph_name(); int operator==(const glyph_t&) const; int operator!=(const glyph_t&) const; }; -inline glyph_t::glyph_t(int idx) -: index (idx) +inline glyph_t::glyph_t(int idx, const char *nm) +: index (idx), name (nm) { } inline glyph_t::glyph_t() -: index (0xdeadbeef) +: index (0xdeadbeef), name (NULL) { } inline glyph_t glyph_t::undefined_glyph() { - return glyph_t(-1); + return glyph_t(-1, NULL); } #define UNDEFINED_GLYPH glyph_t::undefined_glyph() @@ -64,6 +73,11 @@ return index; } +inline const char *glyph_t::glyph_name() +{ + return name; +} + inline int glyph_t::operator==(const glyph_t &other) const { return index == other.index; diff -r -c3 groff-20060204.orig/src/libs/libgroff/nametoindex.cpp groff-20060204/src/libs/libgroff/nametoindex.cpp --- groff-20060204.orig/src/libs/libgroff/nametoindex.cpp 2006-02-06 02:34:13.000000000 +0100 +++ groff-20060204/src/libs/libgroff/nametoindex.cpp 2006-02-08 00:07:53.000000000 +0100 @@ -36,14 +36,14 @@ public: character_indexer(); ~character_indexer(); - int ascii_char_index(unsigned char); - int named_char_index(const char *); - int numbered_char_index(int); + glyph_t ascii_char_index(unsigned char); + glyph_t named_char_index(const char *); + glyph_t numbered_char_index(int); private: enum { NSMALL = 256 }; int next_index; - int ascii_index[256]; - int small_number_index[NSMALL]; + glyph_t ascii_index[256]; + glyph_t small_number_index[NSMALL]; PTABLE(int) table; }; @@ -52,66 +52,76 @@ { int i; for (i = 0; i < 256; i++) - ascii_index[i] = -1; + ascii_index[i] = glyph_t(-1, NULL); for (i = 0; i < NSMALL; i++) - small_number_index[i] = -1; + small_number_index[i] = glyph_t(-1, NULL); } character_indexer::~character_indexer() { } -int character_indexer::ascii_char_index(unsigned char c) +glyph_t character_indexer::ascii_char_index(unsigned char c) { - if (ascii_index[c] < 0) - ascii_index[c] = next_index++; + if (ascii_index[c].index < 0) { + char buf[4+3+1]; + memcpy(buf, "char", 4); + strcpy(buf + 4, i_to_a(c)); + ascii_index[c] = glyph_t(next_index++, strsave(buf)); + } return ascii_index[c]; } -int character_indexer::numbered_char_index(int n) +glyph_t character_indexer::numbered_char_index(int n) { if (n >= 0 && n < NSMALL) { - if (small_number_index[n] < 0) - small_number_index[n] = next_index++; + if (small_number_index[n].index < 0) + small_number_index[n] = glyph_t(next_index++, NULL); return small_number_index[n]; } // Not the most efficient possible implementation. - char buf[INT_DIGITS + 3]; + char buf[1 + 1 + INT_DIGITS + 1]; buf[0] = ' '; strcpy(buf + 1, i_to_a(n)); - return named_char_index(buf); + int *np = table.lookup(buf); + if (!np) { + np = new int[1]; + *np = next_index++; + table.define(buf, np); + } + return glyph_t(*np, NULL); } -int character_indexer::named_char_index(const char *s) +glyph_t character_indexer::named_char_index(const char *s) { - int *np = table.lookup(s); + int *np = table.lookupassoc(&s); if (!np) { np = new int[1]; *np = next_index++; - table.define(s, np); + s = table.define(s, np); } - return *np; + return glyph_t(*np, s); } static character_indexer indexer; glyph_t font::number_to_index(int n) { - return glyph_t(indexer.numbered_char_index(n)); + return indexer.numbered_char_index(n); } glyph_t font::name_to_index(const char *s) { assert(s != 0 && s[0] != '\0' && s[0] != ' '); if (s[1] == '\0') - return glyph_t(indexer.ascii_char_index(s[0])); + return indexer.ascii_char_index(s[0]); /* char128 and \200 are synonyms */ if (s[0] == 'c' && s[1] == 'h' && s[2] == 'a' && s[3] == 'r') { char *val; long n = strtol(s + 4, &val, 10); if (val != s + 4 && *val == '\0' && n >= 0 && n < 256) - return glyph_t(indexer.ascii_char_index((unsigned char)n)); + return indexer.ascii_char_index((unsigned char)n); } - return glyph_t(indexer.named_char_index(s)); + return indexer.named_char_index(s); } diff -r -c3 groff-20060204.orig/src/roff/troff/input.cpp groff-20060204/src/roff/troff/input.cpp --- groff-20060204.orig/src/roff/troff/input.cpp 2006-02-06 02:34:13.000000000 +0100 +++ groff-20060204/src/roff/troff/input.cpp 2006-02-08 00:09:12.000000000 +0100 @@ -8093,7 +8093,7 @@ not_found(0), transparent_translate(1), translate_input(0), mode(CHAR_NORMAL), nm(s) { - index = glyph_t(next_index++); + index = glyph_t(next_index++, s.contents()); } void charinfo::set_hyphenation_code(unsigned char c) _______________________________________________ Groff mailing list Groff@gnu.org http://lists.gnu.org/mailman/listinfo/groff