Hi all,
typically we use hex-byte dump values if messages are not printable in
the LATIN1 encoding in the 'access-log'.
To support Unicode and UTF-8 printable characters in the current locale
of the system, please find attached a patchset that allows to set:
group = core
...
access-log-utf8 = yes
which then allows dumping of UTF-8 as wide char (wchar) types in the code.
Please review and test, comments as always welcome.
Thanks,
Stipe
--
Best Regards,
Stipe Tolj
-------------------------------------------------------------------
Düsseldorf, NRW, Germany
Kannel Foundation tolj.org system architecture
http://www.kannel.org/ http://www.tolj.org/
[email protected] [email protected]
-------------------------------------------------------------------
Index: doc/userguide/userguide.xml
===================================================================
--- doc/userguide/userguide.xml (revision 5335)
+++ doc/userguide/userguide.xml (working copy)
@@ -1354,6 +1354,13 @@
logging format is desired without a prefixed default
timestamp.
</entry></row>
+ <row><entry><literal>access-log-utf8</literal></entry>
+ <entry>boolean</entry>
+ <entry valign="bottom">
+ Indicates if <literal>access-log</literal> will convert and contain any
+ message payload to UTF-8 representation in the log.
+ </entry></row>
+
<row><entry><literal>access-log-format</literal></entry>
<entry>string</entry>
<entry valign="bottom">
Index: gw/bb_alog.c
===================================================================
--- gw/bb_alog.c (revision 5335)
+++ gw/bb_alog.c (working copy)
@@ -61,6 +61,9 @@
* Alexander Malysh <amalysh at kannel dot org>
*/
+#include <wchar.h>
+#include <wctype.h>
+
#include "gwlib/gwlib.h"
#include "msg.h"
#include "sms.h"
@@ -68,8 +71,12 @@
#include "smscconn.h"
static Octstr *custom_log_format = NULL;
+static int use_wchar = 0;
+/* Function pointer to the specific implementation */
+static void (*alog_sms_func)(SMSCConn *conn, Msg *msg, const char *message);
+
/********************************************************************
* Routine to escape the values into the custom log format.
*
@@ -128,12 +135,27 @@
text = msg->sms.msgdata ? octstr_duplicate(msg->sms.msgdata) :
octstr_create("");
udh = msg->sms.udhdata ? octstr_duplicate(msg->sms.udhdata) :
octstr_create("");
- if ((msg->sms.coding == DC_8BIT || msg->sms.coding == DC_UCS2))
- octstr_binary_to_hex(text, 1);
- else
- octstr_convert_printable(text);
octstr_binary_to_hex(udh, 1);
+ /*
+ * Transform text based on encoding and wide char usage.
+ */
+ if (msg->sms.coding == DC_8BIT) {
+ octstr_binary_to_hex(text, 1);
+ }
+ else if (!use_wchar) {
+ if (msg->sms.coding == DC_UCS2) {
+ octstr_binary_to_hex(text, 1);
+ } else {
+ octstr_convert_printable(text);
+ }
+ }
+ else if (msg->sms.coding == DC_UCS2) {
+ if (charset_convert(text, "UTF-16BE", "UTF-8") < 0) {
+ error(0, "Failed to convert msgdata from charset UCS-2 to UTF-8,
will leave as is.");
+ }
+ }
+
if (octstr_len(text)) {
word_list = octstr_split_words(text);
num_words = gwlist_len(word_list);
@@ -221,7 +243,11 @@
break;
case 'L':
- octstr_append_decimal(result, octstr_len(msg->sms.msgdata));
+ if (use_wchar) {
+ octstr_append_decimal(result, octstr_len_wcstr(text));
+ } else {
+ octstr_append_decimal(result,
octstr_len(msg->sms.msgdata));
+ }
break;
case 't':
@@ -353,53 +379,97 @@
/********************************************************************
- *
+ * Specific implementation
*/
-void bb_alog_init(const Octstr *format)
+static void alog_sms(SMSCConn *conn, Msg *msg, const char *message)
{
- gw_assert(format != NULL);
+ Octstr *text = NULL;
+ Octstr *temp;
+ Octstr *udh;
+ const Octstr *cid;
- custom_log_format = octstr_duplicate(format);
+ gw_assert(msg_type(msg) == sms);
+
+ text = msg->sms.msgdata ? octstr_duplicate(msg->sms.msgdata) :
octstr_create("");
+ udh = msg->sms.udhdata ? octstr_duplicate(msg->sms.udhdata) :
octstr_create("");
+
+ if (conn && smscconn_id(conn))
+ cid = smscconn_id(conn);
+ else if (conn && smscconn_name(conn))
+ cid = smscconn_name(conn);
+ else if (msg->sms.smsc_id)
+ cid = msg->sms.smsc_id;
+ else
+ cid = octstr_imm("");
+
+ if ((msg->sms.coding == DC_8BIT || msg->sms.coding == DC_UCS2))
+ octstr_binary_to_hex(text, 1);
+ else
+ octstr_convert_printable(text);
+ octstr_binary_to_hex(udh, 1);
+
+ alog("%s [SMSC:%s] [SVC:%s] [ACT:%s] [BINF:%s] [FID:%s] [META:%s]
[from:%s] [to:%s] [flags:%ld:%ld:%ld:%ld:%ld] "
+ "[msg:%ld:%s] [udh:%ld:%s]",
+ message,
+ octstr_get_cstr(cid),
+ msg->sms.service ? octstr_get_cstr(msg->sms.service) : "",
+ msg->sms.account ? octstr_get_cstr(msg->sms.account) : "",
+ msg->sms.binfo ? octstr_get_cstr(msg->sms.binfo) : "",
+ msg->sms.foreign_id ? octstr_get_cstr(msg->sms.foreign_id) : "",
+ msg->sms.meta_data ? octstr_get_cstr(msg->sms.meta_data) : "",
+ msg->sms.sender ? octstr_get_cstr(msg->sms.sender) : "",
+ msg->sms.receiver ? octstr_get_cstr(msg->sms.receiver) : "",
+ msg->sms.mclass, msg->sms.coding, msg->sms.mwi, msg->sms.compress,
+ msg->sms.dlr_mask,
+ octstr_len(msg->sms.msgdata), octstr_get_cstr(text),
+ octstr_len(msg->sms.udhdata), octstr_get_cstr(udh)
+ );
+
+ octstr_destroy(udh);
+ octstr_destroy(text);
}
-void bb_alog_shutdown(void)
+static void alog_sms_custom(SMSCConn *conn, Msg *msg, const char *message)
{
- octstr_destroy(custom_log_format);
- custom_log_format = NULL;
+ Octstr *text = NULL;
+
+ gw_assert(msg_type(msg) == sms);
+
+ text = get_pattern(conn, msg, message);
+ alog("%s", octstr_get_cstr(text));
+ octstr_destroy(text);
}
-void bb_alog_sms(SMSCConn *conn, Msg *msg, const char *message)
+static void alog_sms_wide(SMSCConn *conn, Msg *msg, const char *message)
{
Octstr *text = NULL;
-
+ Octstr *temp;
+ wchar_t *wcs = NULL;
+ Octstr *udh;
+ const Octstr *cid;
+ size_t wcsl;
+
gw_assert(msg_type(msg) == sms);
- /* if we don't have any custom log, then use our "default" one */
-
- if (custom_log_format == NULL) {
- Octstr *udh;
- const Octstr *cid;
+ text = msg->sms.msgdata ? octstr_duplicate(msg->sms.msgdata) :
octstr_create("");
+ udh = msg->sms.udhdata ? octstr_duplicate(msg->sms.udhdata) :
octstr_create("");
- text = msg->sms.msgdata ? octstr_duplicate(msg->sms.msgdata) :
octstr_create("");
- udh = msg->sms.udhdata ? octstr_duplicate(msg->sms.udhdata) :
octstr_create("");
+ if (conn && smscconn_id(conn))
+ cid = smscconn_id(conn);
+ else if (conn && smscconn_name(conn))
+ cid = smscconn_name(conn);
+ else if (msg->sms.smsc_id)
+ cid = msg->sms.smsc_id;
+ else
+ cid = octstr_imm("");
- if (conn && smscconn_id(conn))
- cid = smscconn_id(conn);
- else if (conn && smscconn_name(conn))
- cid = smscconn_name(conn);
- else if (msg->sms.smsc_id)
- cid = msg->sms.smsc_id;
- else
- cid = octstr_imm("");
+ octstr_binary_to_hex(udh, 1);
- if ((msg->sms.coding == DC_8BIT || msg->sms.coding == DC_UCS2))
- octstr_binary_to_hex(text, 1);
- else
- octstr_convert_printable(text);
- octstr_binary_to_hex(udh, 1);
+ if (msg->sms.coding == DC_8BIT || octstr_len(text) == 0) {
+ octstr_binary_to_hex(text, 1);
alog("%s [SMSC:%s] [SVC:%s] [ACT:%s] [BINF:%s] [FID:%s] [META:%s]
[from:%s] [to:%s] [flags:%ld:%ld:%ld:%ld:%ld] "
"[msg:%ld:%s] [udh:%ld:%s]",
@@ -417,14 +487,90 @@
octstr_len(msg->sms.msgdata), octstr_get_cstr(text),
octstr_len(msg->sms.udhdata), octstr_get_cstr(udh)
);
+ } else {
- octstr_destroy(udh);
- } else {
- text = get_pattern(conn, msg, message);
- alog("%s", octstr_get_cstr(text));
+ if (msg->sms.coding == DC_UCS2) {
+ if (charset_convert(text, "UTF-16BE", "UTF-8") < 0) {
+ error(0, "Failed to convert msgdata from charset UCS-2 to
UTF-8, will leave as is.");
+ }
+ }
+
+ if ((wcs = octstr_get_wcstr(text)) != NULL) {
+ octstr_convert_wchar_printable(wcs);
+ wcsl = wcslen(wcs);
+ } else {
+ wcsl = 0;
+ wcs = L"(null)";
+ }
+
+ alog("%s [SMSC:%s] [SVC:%s] [ACT:%s] [BINF:%s] [FID:%s] [META:%s]
[from:%s] [to:%s] [flags:%ld:%ld:%ld:%ld:%ld] "
+ "[msg:%zu:%ls] [udh:%ld:%s]",
+ message,
+ octstr_get_cstr(cid),
+ msg->sms.service ? octstr_get_cstr(msg->sms.service) : "",
+ msg->sms.account ? octstr_get_cstr(msg->sms.account) : "",
+ msg->sms.binfo ? octstr_get_cstr(msg->sms.binfo) : "",
+ msg->sms.foreign_id ? octstr_get_cstr(msg->sms.foreign_id) : "",
+ msg->sms.meta_data ? octstr_get_cstr(msg->sms.meta_data) : "",
+ msg->sms.sender ? octstr_get_cstr(msg->sms.sender) : "",
+ msg->sms.receiver ? octstr_get_cstr(msg->sms.receiver) : "",
+ msg->sms.mclass, msg->sms.coding, msg->sms.mwi, msg->sms.compress,
+ msg->sms.dlr_mask,
+ wcsl, wcs,
+ octstr_len(msg->sms.udhdata), octstr_get_cstr(udh)
+ );
+
+ gw_free(wcs);
}
+ octstr_destroy(udh);
octstr_destroy(text);
}
+static void alog_sms_custom_wide(SMSCConn *conn, Msg *msg, const char *message)
+{
+ Octstr *text = NULL;
+ wchar_t *wcs = NULL;
+
+ gw_assert(msg_type(msg) == sms);
+
+ text = get_pattern(conn, msg, message);
+
+ wcs = octstr_get_wcstr(text);
+ octstr_convert_wchar_printable(wcs);
+ alog("%ls", wcs);
+ gw_free(wcs);
+
+ octstr_destroy(text);
+}
+
+
+/********************************************************************
+ * Public functions
+ */
+
+void bb_alog_init(const Octstr *format, int wchar)
+{
+ use_wchar = wchar;
+ if (format != NULL) {
+ custom_log_format = octstr_duplicate(format);
+ alog_sms_func = (use_wchar ? alog_sms_custom_wide : alog_sms_custom);
+ } else {
+ custom_log_format = NULL;
+ alog_sms_func = (use_wchar ? alog_sms_wide : alog_sms);
+ }
+}
+
+
+void bb_alog_shutdown(void)
+{
+ octstr_destroy(custom_log_format);
+ custom_log_format = NULL;
+}
+
+
+void bb_alog_sms(SMSCConn *conn, Msg *msg, const char *message)
+{
+ alog_sms_func(conn, msg, message);
+}
Index: gw/bearerbox.c
===================================================================
--- gw/bearerbox.c (revision 5335)
+++ gw/bearerbox.c (working copy)
@@ -377,7 +377,7 @@
CfgGroup *grp;
Octstr *log, *val;
long loglevel, store_dump_freq, value;
- int lf, m;
+ int lf, m, log_utf8;
#ifdef HAVE_LIBSSL
Octstr *ssl_server_cert_file;
Octstr *ssl_server_key_file;
@@ -393,6 +393,7 @@
/* defaults: use localtime and markers for access-log */
lf = m = 1;
+ log_utf8 = 0;
grp = cfg_get_single_group(cfg, octstr_imm("core"));
@@ -432,10 +433,15 @@
/* should predefined markers be used, ie. prefixing timestamp */
cfg_get_bool(&m, grp, octstr_imm("access-log-clean"));
+ /* access-log with wchar support */
+ cfg_get_bool(&log_utf8, grp, octstr_imm("access-log-utf8"));
+
/* custom access-log format */
if ((log = cfg_get(grp, octstr_imm("access-log-format"))) != NULL) {
- bb_alog_init(log);
+ bb_alog_init(log, log_utf8);
octstr_destroy(log);
+ } else {
+ bb_alog_init(log, log_utf8);
}
/* open access-log file */
Index: gw/bearerbox.h
===================================================================
--- gw/bearerbox.h (revision 5335)
+++ gw/bearerbox.h (working copy)
@@ -193,7 +193,7 @@
*/
/* passes the access-log-format string from config to the module */
-void bb_alog_init(const Octstr *format);
+void bb_alog_init(const Octstr *format, int wchar);
/* cleanup for internal things */
void bb_alog_shutdown(void);
Index: gwlib/cfg.def
===================================================================
--- gwlib/cfg.def (revision 5335)
+++ gwlib/cfg.def (working copy)
@@ -101,6 +101,7 @@
OCTSTR(access-log-time)
OCTSTR(access-log-format)
OCTSTR(access-log-clean)
+ OCTSTR(access-log-utf8)
OCTSTR(store-file)
OCTSTR(store-dump-freq)
OCTSTR(store-type)
Index: gwlib/octstr.c
===================================================================
--- gwlib/octstr.c (revision 5335)
+++ gwlib/octstr.c (working copy)
@@ -69,6 +69,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <locale.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
@@ -217,6 +218,7 @@
void octstr_init(void)
{
+ setlocale(LC_ALL, "");
urlcode_init();
mutex_init_static(&immutables_mutex);
immutables_init = 1;
@@ -348,6 +350,16 @@
}
+size_t octstr_len_wcstr(const Octstr *ostr)
+{
+ if (ostr == NULL)
+ return 0;
+ seems_valid(ostr);
+
+ return mbstowcs(NULL, ostr->data, 0);
+}
+
+
Octstr *octstr_copy_real(const Octstr *ostr, long from, long len, const char
*file, long line,
const char *func)
{
@@ -448,6 +460,36 @@
}
+wchar_t *octstr_get_wcstr_real(const Octstr *ostr, const char *file, long line,
+ const char *func)
+{
+ size_t mbslen;
+ wchar_t *wcs;
+
+ if (!ostr)
+ return L"(null)";
+ seems_valid(ostr);
+ if (ostr->len == 0)
+ return L"";
+
+ mbslen = mbstowcs(NULL, ostr->data, 0);
+ if (mbslen == (size_t) -1) {
+ return NULL;
+ }
+
+ wcs = gw_calloc(mbslen + 1, sizeof(*wcs));
+ if (wcs == NULL)
+ return NULL;
+
+ if (mbstowcs(wcs, ostr->data, mbslen + 1) == (size_t) -1) {
+ gw_free(wcs);
+ return NULL;
+ }
+
+ return wcs;
+}
+
+
void octstr_append_from_hex(Octstr *ostr, char *hex)
{
Octstr *output;
@@ -867,7 +909,23 @@
}
+void octstr_convert_wchar_printable(wchar_t *wcs)
+{
+ size_t pos;
+ size_t end;
+ if (wcs == NULL)
+ return;
+
+ end = wcslen(wcs);
+
+ for (pos = 0; pos < end; pos++) {
+ if (!iswprint(wcs[pos]))
+ wcs[pos] = L'.';
+ }
+}
+
+
int octstr_compare(const Octstr *ostr1, const Octstr *ostr2)
{
int ret;
Index: gwlib/octstr.h
===================================================================
--- gwlib/octstr.h (revision 5335)
+++ gwlib/octstr.h (working copy)
@@ -98,6 +98,7 @@
#include <stdio.h>
#include <stdarg.h>
+#include <wchar.h>
#include "list.h"
@@ -235,6 +236,27 @@
/*
+ * Return pointer to content of octet string as a NUL-terminated wide C
+ * character string, which support multi-byte characters in the current
+ * locale.
+ *
+ * If the octet string is empty, an empty wide C string is returned. If
+ * the conversion failed, NULL is returned.
+ */
+wchar_t *octstr_get_wcstr_real(const Octstr *ostr, const char *file, long line,
+ const char *func);
+#define octstr_get_wcstr(ostr) \
+ (octstr_get_wcstr_real(ostr, __FILE__, __LINE__, __func__))
+
+
+/*
+ * Return the length (number of characters) of an wide C string
+ * represented by the octet object string.
+ */
+size_t octstr_len_wcstr(const Octstr *ostr);
+
+
+/*
* Append characters from printable hexadecimal format at the tail of
* an octet string. "78797a" or "78797A" would be converted to "xyz"
* and then appended.
@@ -313,6 +335,12 @@
/*
+ * Same as above, for wide char type.
+ */
+void octstr_convert_wchar_printable(wchar_t *wcs);
+
+
+/*
* Compare two octet strings, returning 0 if they are equal, negative if
* `ostr1' is less than `ostr2' (when compared octet-value by octet-value),
* and positive if greater.