On Monday 26 September 2011 10:07:45 Alexander Larsson wrote: > The text check should be done in xdg_mime_get_mime_type_for_data, not > just _xdg_mime_cache_get_mime_type_for_data, in case some directory > doesn't have a cache file and we fall back on using the "magic" file.
Indeed, I missed a code path. But if the text check is only done in xdg_mime_get_mime_type_for_data, then it's missing the case where xdg_mime_cache_get_mime_type_for_file calls cache_get_mime_type_for_data directly. This makes it hard to have the check in a single place, I don't see a way around having the code in two places (or at least a function call in both places). ... okay after more hacking and testing, the new patch is attached. OK for commit (to git, this time)? I'll delete the sources from CVS to prevent further confusion, too. > Also, our current code in glib doesn't limit itself to the first 32 > chars, but looks at all the data we have read anyway. Is there a > particular reason to stop early? Performance? Yes, obviously we don't want to read a 4 GB file, the theory (which is actually written explicitely in the shared-mime-info spec) is that the chances of a binary file starting with 32 readable ascii chars is pretty low. I can see your point in "we have read more than 32 bytes anyway", but then the results become implementation-dependent, it would depend on how much data was buffered by a given implementation. So I suggest we use 32 everywhere. -- David Faure, [email protected], http://www.davidfaure.fr Sponsored by Nokia to work on KDE, incl. Konqueror (http://www.konqueror.org).
>From 5181175d5fdaa3832b0fd094cda0120b1fe92af6 Mon Sep 17 00:00:00 2001 From: David Faure <[email protected]> Date: Thu, 29 Sep 2011 17:26:33 +0200 Subject: [PATCH] Implement text vs binary fallback; support for application/x-zerosize --- src/xdgmime.c | 18 +++++++++++++----- src/xdgmime.h | 6 ++++++ src/xdgmimecache.c | 9 +++++++-- src/xdgmimeint.c | 15 +++++++++++++++ src/xdgmimeint.h | 1 + 5 files changed, 42 insertions(+), 7 deletions(-) diff --git a/src/xdgmime.c b/src/xdgmime.c index c3b21cb..c7b16bb 100644 --- a/src/xdgmime.c +++ b/src/xdgmime.c @@ -64,6 +64,8 @@ XdgMimeCache **_caches = NULL; static int n_caches = 0; const char xdg_mime_type_unknown[] = "application/octet-stream"; +const char xdg_mime_type_empty[] = "application/x-zerosize"; +const char xdg_mime_type_textplain[] = "text/plain"; enum @@ -467,17 +469,23 @@ xdg_mime_get_mime_type_for_data (const void *data, { const char *mime_type; + if (len == 0) + { + *result_prio = 100; + return XDG_MIME_TYPE_EMPTY; + } + xdg_mime_init (); if (_caches) - return _xdg_mime_cache_get_mime_type_for_data (data, len, result_prio); - - mime_type = _xdg_mime_magic_lookup_data (global_magic, data, len, result_prio, NULL, 0); + mime_type = _xdg_mime_cache_get_mime_type_for_data (data, len, result_prio); + else + mime_type = _xdg_mime_magic_lookup_data (global_magic, data, len, result_prio, NULL, 0); if (mime_type) return mime_type; - return XDG_MIME_TYPE_UNKNOWN; + return _xdg_binary_or_text_fallback(data, len); } const char * @@ -556,7 +564,7 @@ xdg_mime_get_mime_type_for_file (const char *file_name, if (mime_type) return mime_type; - return XDG_MIME_TYPE_UNKNOWN; + return _xdg_binary_or_text_fallback(data, bytes_read); } const char * diff --git a/src/xdgmime.h b/src/xdgmime.h index d3031a3..6a34edf 100644 --- a/src/xdgmime.h +++ b/src/xdgmime.h @@ -68,6 +68,8 @@ typedef void (*XdgMimeDestroy) (void *user_data); #define xdg_mime_register_reload_callback XDG_ENTRY(register_reload_callback) #define xdg_mime_remove_callback XDG_ENTRY(remove_callback) #define xdg_mime_type_unknown XDG_ENTRY(type_unknown) +#define xdg_mime_type_empty XDG_ENTRY(type_empty) +#define xdg_mime_type_textplain XDG_ENTRY(type_textplain) #define xdg_mime_get_icon XDG_ENTRY(get_icon) #define xdg_mime_get_generic_icon XDG_ENTRY(get_generic_icon) @@ -77,7 +79,11 @@ typedef void (*XdgMimeDestroy) (void *user_data); #endif extern const char xdg_mime_type_unknown[]; +extern const char xdg_mime_type_empty[]; +extern const char xdg_mime_type_textplain[]; #define XDG_MIME_TYPE_UNKNOWN xdg_mime_type_unknown +#define XDG_MIME_TYPE_EMPTY xdg_mime_type_empty +#define XDG_MIME_TYPE_TEXTPLAIN xdg_mime_type_textplain const char *xdg_mime_get_mime_type_for_data (const void *data, size_t len, diff --git a/src/xdgmimecache.c b/src/xdgmimecache.c index 1e99b3e..41b13e9 100644 --- a/src/xdgmimecache.c +++ b/src/xdgmimecache.c @@ -693,12 +693,11 @@ cache_get_mime_type_for_data (const void *data, for (n = 0; n < n_mime_types; n++) { - if (mime_types[n]) return mime_types[n]; } - return XDG_MIME_TYPE_UNKNOWN; + return NULL; } const char * @@ -743,6 +742,9 @@ _xdg_mime_cache_get_mime_type_for_file (const char *file_name, statbuf = &buf; } + if (statbuf->st_size == 0) + return XDG_MIME_TYPE_EMPTY; + if (!S_ISREG (statbuf->st_mode)) return XDG_MIME_TYPE_UNKNOWN; @@ -772,6 +774,9 @@ _xdg_mime_cache_get_mime_type_for_file (const char *file_name, mime_type = cache_get_mime_type_for_data (data, bytes_read, NULL, mime_types, n); + if (!mime_type) + mime_type = _xdg_binary_or_text_fallback(data, bytes_read); + free (data); fclose (file); diff --git a/src/xdgmimeint.c b/src/xdgmimeint.c index d372d2c..cf789d9 100644 --- a/src/xdgmimeint.c +++ b/src/xdgmimeint.c @@ -189,3 +189,18 @@ _xdg_reverse_ucs4 (xdg_unichar_t *source, int len) } } +const char * +_xdg_binary_or_text_fallback(const void *data, size_t len) +{ + unsigned char *chardata; + int i; + + chardata = (unsigned char *) data; + for (i = 0; i < 32 && i < len; ++i) + { + if (chardata[i] < 32 && chardata[i] != 9 && chardata[i] != 10 && chardata[i] != 13) + return XDG_MIME_TYPE_UNKNOWN; /* binary data */ + } + + return XDG_MIME_TYPE_TEXTPLAIN; +} diff --git a/src/xdgmimeint.h b/src/xdgmimeint.h index 232c808..9e8b2cb 100644 --- a/src/xdgmimeint.h +++ b/src/xdgmimeint.h @@ -73,5 +73,6 @@ int _xdg_utf8_validate (const char *source); xdg_unichar_t *_xdg_convert_to_ucs4 (const char *source, int *len); void _xdg_reverse_ucs4 (xdg_unichar_t *source, int len); const char *_xdg_get_base_name (const char *file_name); +const char *_xdg_binary_or_text_fallback(const void *data, size_t len); #endif /* __XDG_MIME_INT_H__ */ -- 1.7.3.4
_______________________________________________ xdg mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/xdg
