Hi all,

typically we use hex-byte dump values if messages are not printable in the LATIN1 encoding in the 'access-log'.

To support Unicode and UTF-8 printable characters in the current locale of the system, please find attached a patchset that allows to set:

  group = core
  ...
  access-log-utf8 = yes

which then allows dumping of UTF-8 as wide char (wchar) types in the code.

Please review and test, comments as always welcome.

Thanks,
Stipe

--
Best Regards,
Stipe Tolj

-------------------------------------------------------------------
Düsseldorf, NRW, Germany

Kannel Foundation                 tolj.org system architecture
http://www.kannel.org/            http://www.tolj.org/

[email protected]                  [email protected]
-------------------------------------------------------------------
Index: doc/userguide/userguide.xml
===================================================================
--- doc/userguide/userguide.xml (revision 5335)
+++ doc/userguide/userguide.xml (working copy)
@@ -1354,6 +1354,13 @@
                  logging format is desired without a prefixed default 
timestamp. 
      </entry></row>
 
+    <row><entry><literal>access-log-utf8</literal></entry>
+     <entry>boolean</entry>
+     <entry valign="bottom">
+        Indicates if <literal>access-log</literal> will convert and contain any
+        message payload to UTF-8 representation in the log.
+     </entry></row>
+
     <row><entry><literal>access-log-format</literal></entry>
      <entry>string</entry>
      <entry valign="bottom">
Index: gw/bb_alog.c
===================================================================
--- gw/bb_alog.c        (revision 5335)
+++ gw/bb_alog.c        (working copy)
@@ -61,6 +61,9 @@
  * Alexander Malysh <amalysh at kannel dot org>
  */
 
+#include <wchar.h>
+#include <wctype.h>
+
 #include "gwlib/gwlib.h"
 #include "msg.h"
 #include "sms.h"
@@ -68,8 +71,12 @@
 #include "smscconn.h"
 
 static Octstr *custom_log_format = NULL;
+static int use_wchar = 0;
 
+/* Function pointer to the specific implementation */
+static void (*alog_sms_func)(SMSCConn *conn, Msg *msg, const char *message);
 
+
 /********************************************************************
  * Routine to escape the values into the custom log format.
  *
@@ -128,12 +135,27 @@
  
     text = msg->sms.msgdata ? octstr_duplicate(msg->sms.msgdata) : 
octstr_create("");
     udh = msg->sms.udhdata ? octstr_duplicate(msg->sms.udhdata) : 
octstr_create("");
-    if ((msg->sms.coding == DC_8BIT || msg->sms.coding == DC_UCS2))
-        octstr_binary_to_hex(text, 1);
-    else
-        octstr_convert_printable(text);
     octstr_binary_to_hex(udh, 1);
 
+    /*
+     * Transform text based on encoding and wide char usage.
+     */
+    if (msg->sms.coding == DC_8BIT) {
+        octstr_binary_to_hex(text, 1);
+    }
+    else if (!use_wchar) {
+        if (msg->sms.coding == DC_UCS2) {
+            octstr_binary_to_hex(text, 1);
+        } else {
+            octstr_convert_printable(text);
+        }
+    }
+    else if (msg->sms.coding == DC_UCS2) {
+        if (charset_convert(text, "UTF-16BE", "UTF-8") < 0) {
+            error(0, "Failed to convert msgdata from charset UCS-2 to UTF-8, 
will leave as is.");
+        }
+    }
+
     if (octstr_len(text)) {
         word_list = octstr_split_words(text);
         num_words = gwlist_len(word_list);
@@ -221,7 +243,11 @@
                 break;
 
             case 'L':
-                octstr_append_decimal(result, octstr_len(msg->sms.msgdata));
+                if (use_wchar) {
+                    octstr_append_decimal(result, octstr_len_wcstr(text));
+                } else {
+                    octstr_append_decimal(result, 
octstr_len(msg->sms.msgdata));
+                }
                 break;
 
             case 't':
@@ -353,53 +379,97 @@
 
 
 /********************************************************************
- * 
+ * Specific implementation
  */
 
-void bb_alog_init(const Octstr *format)
+static void alog_sms(SMSCConn *conn, Msg *msg, const char *message)
 {
-    gw_assert(format != NULL);
+    Octstr *text = NULL;
+    Octstr *temp;
+    Octstr *udh;
+    const Octstr *cid;
 
-    custom_log_format = octstr_duplicate(format);
+    gw_assert(msg_type(msg) == sms);
+
+    text = msg->sms.msgdata ? octstr_duplicate(msg->sms.msgdata) : 
octstr_create("");
+    udh = msg->sms.udhdata ? octstr_duplicate(msg->sms.udhdata) : 
octstr_create("");
+
+    if (conn && smscconn_id(conn))
+        cid = smscconn_id(conn);
+    else if (conn && smscconn_name(conn))
+        cid = smscconn_name(conn);
+    else if (msg->sms.smsc_id)
+        cid = msg->sms.smsc_id;
+    else
+        cid = octstr_imm("");
+
+    if ((msg->sms.coding == DC_8BIT || msg->sms.coding == DC_UCS2))
+        octstr_binary_to_hex(text, 1);
+    else
+        octstr_convert_printable(text);
+    octstr_binary_to_hex(udh, 1);
+
+    alog("%s [SMSC:%s] [SVC:%s] [ACT:%s] [BINF:%s] [FID:%s] [META:%s] 
[from:%s] [to:%s] [flags:%ld:%ld:%ld:%ld:%ld] "
+         "[msg:%ld:%s] [udh:%ld:%s]",
+         message,
+         octstr_get_cstr(cid),
+         msg->sms.service ? octstr_get_cstr(msg->sms.service) : "",
+         msg->sms.account ? octstr_get_cstr(msg->sms.account) : "",
+         msg->sms.binfo ? octstr_get_cstr(msg->sms.binfo) : "",
+         msg->sms.foreign_id ? octstr_get_cstr(msg->sms.foreign_id) : "",
+         msg->sms.meta_data ? octstr_get_cstr(msg->sms.meta_data) : "",
+         msg->sms.sender ? octstr_get_cstr(msg->sms.sender) : "",
+         msg->sms.receiver ? octstr_get_cstr(msg->sms.receiver) : "",
+         msg->sms.mclass, msg->sms.coding, msg->sms.mwi, msg->sms.compress,
+         msg->sms.dlr_mask,
+         octstr_len(msg->sms.msgdata), octstr_get_cstr(text),
+         octstr_len(msg->sms.udhdata), octstr_get_cstr(udh)
+    );
+
+    octstr_destroy(udh);
+    octstr_destroy(text);
 }
 
 
-void bb_alog_shutdown(void)
+static void alog_sms_custom(SMSCConn *conn, Msg *msg, const char *message)
 {
-    octstr_destroy(custom_log_format);
-    custom_log_format = NULL;
+    Octstr *text = NULL;
+
+    gw_assert(msg_type(msg) == sms);
+
+    text = get_pattern(conn, msg, message);
+    alog("%s", octstr_get_cstr(text));
+    octstr_destroy(text);
 }
 
 
-void bb_alog_sms(SMSCConn *conn, Msg *msg, const char *message)
+static void alog_sms_wide(SMSCConn *conn, Msg *msg, const char *message)
 {
     Octstr *text = NULL;
-    
+    Octstr *temp;
+    wchar_t *wcs = NULL;
+    Octstr *udh;
+    const Octstr *cid;
+    size_t wcsl;
+
     gw_assert(msg_type(msg) == sms);
 
-    /* if we don't have any custom log, then use our "default" one */
-    
-    if (custom_log_format == NULL) {
-        Octstr *udh;
-        const Octstr *cid;
+    text = msg->sms.msgdata ? octstr_duplicate(msg->sms.msgdata) : 
octstr_create("");
+    udh = msg->sms.udhdata ? octstr_duplicate(msg->sms.udhdata) : 
octstr_create("");
 
-        text = msg->sms.msgdata ? octstr_duplicate(msg->sms.msgdata) : 
octstr_create("");
-        udh = msg->sms.udhdata ? octstr_duplicate(msg->sms.udhdata) : 
octstr_create("");
+    if (conn && smscconn_id(conn))
+        cid = smscconn_id(conn);
+    else if (conn && smscconn_name(conn))
+        cid = smscconn_name(conn);
+    else if (msg->sms.smsc_id)
+        cid = msg->sms.smsc_id;
+    else
+        cid = octstr_imm("");
 
-        if (conn && smscconn_id(conn))
-            cid = smscconn_id(conn);
-        else if (conn && smscconn_name(conn))
-            cid = smscconn_name(conn);
-        else if (msg->sms.smsc_id)
-            cid = msg->sms.smsc_id;
-        else
-            cid = octstr_imm("");
+    octstr_binary_to_hex(udh, 1);
 
-        if ((msg->sms.coding == DC_8BIT || msg->sms.coding == DC_UCS2))
-            octstr_binary_to_hex(text, 1);
-        else
-            octstr_convert_printable(text);
-        octstr_binary_to_hex(udh, 1);
+    if (msg->sms.coding == DC_8BIT || octstr_len(text) == 0) {
+        octstr_binary_to_hex(text, 1);
 
         alog("%s [SMSC:%s] [SVC:%s] [ACT:%s] [BINF:%s] [FID:%s] [META:%s] 
[from:%s] [to:%s] [flags:%ld:%ld:%ld:%ld:%ld] "
              "[msg:%ld:%s] [udh:%ld:%s]",
@@ -417,14 +487,90 @@
              octstr_len(msg->sms.msgdata), octstr_get_cstr(text),
              octstr_len(msg->sms.udhdata), octstr_get_cstr(udh)
         );
+    } else {
 
-        octstr_destroy(udh);
-    } else {
-        text = get_pattern(conn, msg, message);
-        alog("%s", octstr_get_cstr(text));
+        if (msg->sms.coding == DC_UCS2) {
+            if (charset_convert(text, "UTF-16BE", "UTF-8") < 0) {
+                error(0, "Failed to convert msgdata from charset UCS-2 to 
UTF-8, will leave as is.");
+            }
+        }
+
+        if ((wcs = octstr_get_wcstr(text)) != NULL) {
+            octstr_convert_wchar_printable(wcs);
+            wcsl = wcslen(wcs);
+        } else {
+            wcsl = 0;
+            wcs = L"(null)";
+        }
+
+        alog("%s [SMSC:%s] [SVC:%s] [ACT:%s] [BINF:%s] [FID:%s] [META:%s] 
[from:%s] [to:%s] [flags:%ld:%ld:%ld:%ld:%ld] "
+             "[msg:%zu:%ls] [udh:%ld:%s]",
+             message,
+             octstr_get_cstr(cid),
+             msg->sms.service ? octstr_get_cstr(msg->sms.service) : "",
+             msg->sms.account ? octstr_get_cstr(msg->sms.account) : "",
+             msg->sms.binfo ? octstr_get_cstr(msg->sms.binfo) : "",
+             msg->sms.foreign_id ? octstr_get_cstr(msg->sms.foreign_id) : "",
+             msg->sms.meta_data ? octstr_get_cstr(msg->sms.meta_data) : "",
+             msg->sms.sender ? octstr_get_cstr(msg->sms.sender) : "",
+             msg->sms.receiver ? octstr_get_cstr(msg->sms.receiver) : "",
+             msg->sms.mclass, msg->sms.coding, msg->sms.mwi, msg->sms.compress,
+             msg->sms.dlr_mask,
+             wcsl, wcs,
+             octstr_len(msg->sms.udhdata), octstr_get_cstr(udh)
+        );
+
+        gw_free(wcs);
     }
 
+    octstr_destroy(udh);
     octstr_destroy(text);
 }
 
 
+static void alog_sms_custom_wide(SMSCConn *conn, Msg *msg, const char *message)
+{
+    Octstr *text = NULL;
+    wchar_t *wcs = NULL;
+
+    gw_assert(msg_type(msg) == sms);
+
+    text = get_pattern(conn, msg, message);
+
+    wcs = octstr_get_wcstr(text);
+    octstr_convert_wchar_printable(wcs);
+    alog("%ls", wcs);
+    gw_free(wcs);
+
+    octstr_destroy(text);
+}
+
+
+/********************************************************************
+ * Public functions
+ */
+
+void bb_alog_init(const Octstr *format, int wchar)
+{
+    use_wchar = wchar;
+    if (format != NULL) {
+        custom_log_format = octstr_duplicate(format);
+        alog_sms_func = (use_wchar ? alog_sms_custom_wide : alog_sms_custom);
+    } else {
+        custom_log_format = NULL;
+        alog_sms_func = (use_wchar ? alog_sms_wide : alog_sms);
+    }
+}
+
+
+void bb_alog_shutdown(void)
+{
+    octstr_destroy(custom_log_format);
+    custom_log_format = NULL;
+}
+
+
+void bb_alog_sms(SMSCConn *conn, Msg *msg, const char *message)
+{
+    alog_sms_func(conn, msg, message);
+}
Index: gw/bearerbox.c
===================================================================
--- gw/bearerbox.c      (revision 5335)
+++ gw/bearerbox.c      (working copy)
@@ -377,7 +377,7 @@
     CfgGroup *grp;
     Octstr *log, *val;
     long loglevel, store_dump_freq, value;
-    int lf, m;
+    int lf, m, log_utf8;
 #ifdef HAVE_LIBSSL
     Octstr *ssl_server_cert_file;
     Octstr *ssl_server_key_file;
@@ -393,6 +393,7 @@
 
     /* defaults: use localtime and markers for access-log */
     lf = m = 1;
+       log_utf8 = 0;
        
     grp = cfg_get_single_group(cfg, octstr_imm("core"));
 
@@ -432,10 +433,15 @@
     /* should predefined markers be used, ie. prefixing timestamp */
     cfg_get_bool(&m, grp, octstr_imm("access-log-clean"));
 
+    /* access-log with wchar support */
+    cfg_get_bool(&log_utf8, grp, octstr_imm("access-log-utf8"));
+
     /* custom access-log format  */
     if ((log = cfg_get(grp, octstr_imm("access-log-format"))) != NULL) {
-        bb_alog_init(log);
+        bb_alog_init(log, log_utf8);
         octstr_destroy(log);
+    } else {
+        bb_alog_init(log, log_utf8);
     }
 
     /* open access-log file */
Index: gw/bearerbox.h
===================================================================
--- gw/bearerbox.h      (revision 5335)
+++ gw/bearerbox.h      (working copy)
@@ -193,7 +193,7 @@
  */
 
 /* passes the access-log-format string from config to the module */
-void bb_alog_init(const Octstr *format);
+void bb_alog_init(const Octstr *format, int wchar);
 
 /* cleanup for internal things */
 void bb_alog_shutdown(void);
Index: gwlib/cfg.def
===================================================================
--- gwlib/cfg.def       (revision 5335)
+++ gwlib/cfg.def       (working copy)
@@ -101,6 +101,7 @@
     OCTSTR(access-log-time)
     OCTSTR(access-log-format)
     OCTSTR(access-log-clean)
+    OCTSTR(access-log-utf8)
     OCTSTR(store-file)
     OCTSTR(store-dump-freq)
     OCTSTR(store-type)
Index: gwlib/octstr.c
===================================================================
--- gwlib/octstr.c      (revision 5335)
+++ gwlib/octstr.c      (working copy)
@@ -69,6 +69,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <locale.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
@@ -217,6 +218,7 @@
 
 void octstr_init(void)
 {
+    setlocale(LC_ALL, "");
     urlcode_init();
     mutex_init_static(&immutables_mutex);
     immutables_init = 1;
@@ -348,6 +350,16 @@
 }
 
 
+size_t octstr_len_wcstr(const Octstr *ostr)
+{
+    if (ostr == NULL)
+        return 0;
+    seems_valid(ostr);
+
+    return mbstowcs(NULL, ostr->data, 0);
+}
+
+
 Octstr *octstr_copy_real(const Octstr *ostr, long from, long len, const char 
*file, long line,
                          const char *func)
 {
@@ -448,6 +460,36 @@
 }
 
 
+wchar_t *octstr_get_wcstr_real(const Octstr *ostr, const char *file, long line,
+                               const char *func)
+{
+    size_t mbslen;
+    wchar_t *wcs;
+
+    if (!ostr)
+        return L"(null)";
+    seems_valid(ostr);
+    if (ostr->len == 0)
+        return L"";
+
+    mbslen = mbstowcs(NULL, ostr->data, 0);
+    if (mbslen == (size_t) -1) {
+        return NULL;
+    }
+
+    wcs = gw_calloc(mbslen + 1, sizeof(*wcs));
+    if (wcs == NULL)
+        return NULL;
+
+    if (mbstowcs(wcs, ostr->data, mbslen + 1) == (size_t) -1) {
+        gw_free(wcs);
+        return NULL;
+    }
+
+    return wcs;
+}
+
+
 void octstr_append_from_hex(Octstr *ostr, char *hex)
 {
     Octstr *output;
@@ -867,7 +909,23 @@
 }
 
 
+void octstr_convert_wchar_printable(wchar_t *wcs)
+{
+    size_t pos;
+    size_t end;
 
+    if (wcs == NULL)
+        return;
+
+    end = wcslen(wcs);
+
+    for (pos = 0; pos < end; pos++) {
+        if (!iswprint(wcs[pos]))
+            wcs[pos] = L'.';
+    }
+}
+
+
 int octstr_compare(const Octstr *ostr1, const Octstr *ostr2)
 {
     int ret;
Index: gwlib/octstr.h
===================================================================
--- gwlib/octstr.h      (revision 5335)
+++ gwlib/octstr.h      (working copy)
@@ -98,6 +98,7 @@
 
 #include <stdio.h>
 #include <stdarg.h>
+#include <wchar.h>
 
 #include "list.h"
 
@@ -235,6 +236,27 @@
 
 
 /*
+ * Return pointer to content of octet string as a NUL-terminated wide C
+ * character string, which support multi-byte characters in the current
+ * locale.
+ *
+ * If the octet string is empty, an empty wide C string is returned. If
+ * the conversion failed, NULL is returned.
+ */
+wchar_t *octstr_get_wcstr_real(const Octstr *ostr, const char *file, long line,
+                               const char *func);
+#define octstr_get_wcstr(ostr) \
+    (octstr_get_wcstr_real(ostr, __FILE__, __LINE__, __func__))
+
+
+/*
+ * Return the length (number of characters) of an wide C string
+ * represented by the octet object string.
+ */
+size_t octstr_len_wcstr(const Octstr *ostr);
+
+
+/*
  * Append characters from printable hexadecimal format at the tail of 
  * an octet string. "78797a" or "78797A" would be converted to "xyz"
  * and then appended.
@@ -313,6 +335,12 @@
 
 
 /*
+ * Same as above, for wide char type.
+ */
+void octstr_convert_wchar_printable(wchar_t *wcs);
+
+
+/*
  * Compare two octet strings, returning 0 if they are equal, negative if
  * `ostr1' is less than `ostr2' (when compared octet-value by octet-value),
  * and positive if greater.

Reply via email to