On Wed, Dec 14, 2011 at 02:02:33PM +0400, Alexander V. Lukyanov wrote:
> It means that it is possible to avoid is_cjk_encoding() calling at all,
> because uc_width only uses encoding for cjk checking and uc_width is only
> called by wcwidth for UTF-8 case (which is not a cjk encoding).

Here is a patch for this. Not a big optimization, though.

-- 
   Alexander.
diff --git a/lib/uniwidth.in.h b/lib/uniwidth.in.h
index e806744..0af2359 100644
--- a/lib/uniwidth.in.h
+++ b/lib/uniwidth.in.h
@@ -44,6 +44,13 @@ extern int
 #endif
        ;
 
+extern int
+       uc_width1 (ucs4_t uc)
+#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
+       __attribute__ ((__pure__))
+#endif
+       ;
+
 /* Determine number of column positions required for first N units
    (or fewer if S ends before this) in S.  */
 extern int
diff --git a/lib/uniwidth/width.c b/lib/uniwidth/width.c
index a314e71..00c7789 100644
--- a/lib/uniwidth/width.c
+++ b/lib/uniwidth/width.c
@@ -312,7 +312,7 @@ static const signed char nonspacing_table_ind[240] = {
 
 /* Determine number of column positions required for UC.  */
 int
-uc_width (ucs4_t uc, const char *encoding)
+uc_width1 (ucs4_t uc)
 {
   /* Test for non-spacing or control character.  */
   if ((uc >> 9) < 240)
@@ -359,10 +359,17 @@ uc_width (ucs4_t uc, const char *encoding)
           || (uc >= 0x30000 && uc <= 0x3ffff) /* Tertiary Ideographic Plane */
      )   )
     return 2;
+  return 1;
+}
+
+int
+uc_width (ucs4_t uc, const char *encoding)
+{
+  int w = uc_width1 (uc);
   /* In ancient CJK encodings, Cyrillic and most other characters are
      double-width as well.  */
   if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9
-      && is_cjk_encoding (encoding))
+      && w == 1 && is_cjk_encoding (encoding))
     return 2;
-  return 1;
+  return w;
 }
diff --git a/lib/wcwidth.c b/lib/wcwidth.c
index a006ca7..1b5fb36 100644
--- a/lib/wcwidth.c
+++ b/lib/wcwidth.c
@@ -36,7 +36,7 @@ wcwidth (wchar_t wc)
     {
       /* We assume that in a UTF-8 locale, a wide character is the same as a
          Unicode character.  */
-      return uc_width (wc, encoding);
+      return uc_width1 (wc);
     }
   else
     {

Reply via email to