Package: file
Version: 4.17-4
Followup-For: Bug #38542

I am having problems with these rules too, and there seems to be no
way to turn them off. I can see how they might be useful e.g. in code
analysis, but for other uses (e.g. determining MIME type for serving
files from a web application) they are a positive nuisance: all sorts
of text-ish files end up being identified as program source. Of
course, I want REAL program sources (ending in the correct suffixes,
normally) to be identified as such, but equally non-suffixed files
that aren't text (e.g. scripts) should also be correctly identified,
so I can't rely on suffixes alone.

I attach a (tested!) patch (code and docs!) to add a new option
--no-ascmagic/-A.
-- System Information:
Debian Release: testing/unstable
  APT prefers testing
  APT policy: (500, 'testing')
Architecture: i386 (i686)
Shell:  /bin/sh linked to /bin/bash
Kernel: Linux 2.6.16
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8)

Versions of packages file depends on:
ii  libc6                        2.3.6.ds1-4 GNU C Library: Shared libraries
ii  libmagic1                    4.17-4      File type determination library us

file recommends no packages.

-- no debconf information
diff -Nur file-4.17/doc/file.man file-4.17-opt_ascmagic/doc/file.man
--- file-4.17/doc/file.man      2006-10-23 19:39:10.000000000 +0100
+++ file-4.17-opt_ascmagic/doc/file.man 2006-10-23 20:18:03.000000000 +0100
@@ -6,7 +6,7 @@
 .SH SYNOPSIS
 .B file
 [
-.B \-bchikLnNprsvz
+.B \-bchiAkLnNprsvz
 ]
 [
 .B \-f
@@ -194,6 +194,11 @@
 .I POSIXLY_CORRECT
 is not defined.
 .TP 8
+.B "\-A, \-\-no-ascmagic"
+Causes built-in rules to guess certain programming language source
+types from their content to be skipped. These rules can cause problems
+by giving false positives on plain text files.
+.TP 8
 .B "\-i, \-\-mime"
 Causes the file command to output mime type strings rather than the more
 traditional human readable ones. Thus it may say
diff -Nur file-4.17/src/file.c file-4.17-opt_ascmagic/src/file.c
--- file-4.17/src/file.c        2006-10-23 19:39:10.000000000 +0100
+++ file-4.17-opt_ascmagic/src/file.c   2006-10-23 20:22:10.000000000 +0100
@@ -126,7 +126,7 @@
        int flags = 0;
        char *home, *usermagic;
        struct stat sb;
-#define OPTSTRING      "bcCdf:F:hikLm:nNprsvz"
+#define OPTSTRING      "bcCdf:F:hiAkLm:nNprsvz"
 #ifdef HAVE_GETOPT_LONG
        int longindex;
        private struct option long_options[] =
@@ -139,6 +139,7 @@
                {"files-from", 1, 0, 'f'},
                {"separator", 1, 0, 'F'},
                {"mime", 0, 0, 'i'},
+                {"no-ascmagic", 0, 0, 'A'},
                {"keep-going", 0, 0, 'k'},
 #ifdef S_IFLNK
                {"dereference", 0, 0, 'L'},
@@ -229,6 +230,9 @@
                case 'i':
                        flags |= MAGIC_MIME;
                        break;
+                case 'A':
+                       flags |= MAGIC_NOASCMAGIC;
+                        break;
                case 'k':
                        flags |= MAGIC_CONTINUE;
                        break;
diff -Nur file-4.17/src/funcs.c file-4.17-opt_ascmagic/src/funcs.c
--- file-4.17/src/funcs.c       2006-03-02 22:10:26.000000000 +0000
+++ file-4.17-opt_ascmagic/src/funcs.c  2006-10-23 20:10:03.000000000 +0100
@@ -128,7 +128,7 @@
            /* try tests in /etc/magic (or surrogate magic file) */
            if ((m = file_softmagic(ms, buf, nb)) == 0) {
                /* try known keywords, check whether it is ASCII */
-               if ((m = file_ascmagic(ms, buf, nb)) == 0) {
+               if (ms->flags & MAGIC_NOASCMAGIC || (m = file_ascmagic(ms, buf, 
nb)) == 0) {
                    /* abandon hope, all ye who remain here */
                    if (file_printf(ms, ms->flags & MAGIC_MIME ?
                        (nb ? "application/octet-stream" :
diff -Nur file-4.17/src/magic.h file-4.17-opt_ascmagic/src/magic.h
--- file-4.17/src/magic.h       2004-09-25 16:20:29.000000000 +0100
+++ file-4.17-opt_ascmagic/src/magic.h  2006-10-23 20:10:59.000000000 +0100
@@ -40,6 +40,7 @@
 #define        MAGIC_PRESERVE_ATIME    0x080   /* Restore access time on exit 
*/
 #define        MAGIC_RAW               0x100   /* Don't translate unprintable 
chars */
 #define        MAGIC_ERROR             0x200   /* Handle ENOENT etc as real 
errors */
+#define        MAGIC_NOASCMAGIC        0x400   /* Don't use ascmagic rules */
 
 #ifdef __cplusplus
 extern "C" {

Reply via email to