#!/usr/bin/perl

use warnings;
use strict;
#use re 'debug';

my $magic = shift;

open my $fh, "<$magic" or die;
my @lines = <$fh>;
close $fh;

my @apache_types = ();
open my $apache, "<mime.types" or print STDERR "Not using Apache MIME database.\n";
if (defined($apache)) {
	while (<$apache>) {
		$_ =~ /^([^#\s]+)/;
		if (defined($1)) {
			my $type = $1;
			chomp $type;
			push @apache_types, $type;
		}
	}
}
close $apache;

my @registered_types = `./get_iana_types.pl`;
my %content_types = ();
foreach my $type (@registered_types)
{
	chomp $type;
	my ($type, $subtype) = split /\//, $type;
	push @{$content_types{$type}}, $subtype;
}

my @seen = ();

foreach my $line (@lines)
{
	if ($line =~ /^#/) { next; }
	if ($line =~ /^\s*$/) { next; }
	# MIME type is in the 4th tabs-separated field
	# Ugh, some lines have fields separated by spaces.  Worst,
	# some are separated by only one space, while a space can occur
	# in the magic signature, as long as it is preceded by a
	# backslash!

	my $mime = '';
#	if ($line =~ /^[^\t]+(?:\t+|[\t\s]{2,})[^\t]+(?:\t+|[\t\s]{2,})[^\t]+(?:\t+|[\t\s]{2,})([^\t]+?)(?:\t.*)?$/) {
	if ($line =~ /^(?:[^\s]|(?<=\\)\s)+\s+(?:[^\s]|(?<=\\)\s)+\s+(?:[^\s]|(?<=\\)\s)+\s+((?:[^\s]|(?<=\\)\s)+)(?:\s+.*)?$/) {
		$mime = $1;
		#print $line;
		#print $mime."\n";
	}
	else {
		#print STDERR "Non-matched line: $line";
		next;
	}

	$mime =~ /^(.*)\/(.*)$/;
	my $type = $1;
	my $subtype = $2;
	my $match = 0;
	my @approx = ();
	my @apache_matches = ();

	foreach my $seen (@seen) {
		if ($mime eq $seen) { $match = 1; last; }
	}
	if ($match == 1) { next; }

	push @seen, $mime;
	$match = 0;

	# If not x- subtype, make sure it is registered
	if ($subtype !~ /^x[-\.]/i) {
		foreach my $reg_subtype (@{$content_types{$type}})
		{
			if (lc($subtype) eq lc($reg_subtype)) { $match = 1; }
			elsif ($reg_subtype =~ /$subtype/)
				{ push @approx, $reg_subtype; }
		}
		if (!$match) {
			print "$mime is not a registered type; use $type/x-$subtype instead!\n";
			if (scalar(@approx) > 0) {
				print "Maybe you meant one of the following: ";
				foreach my $approx (@approx) {
					print "$type/$approx ";
				}
				print "\n";
			}
			my @defacto = ();
			my $apache_match = 0;
			foreach my $apache_type (@apache_types) {
				my ($apache_maintype, $apache_subtype) = split /\//, $apache_type;
				if ($apache_maintype ne $type) { next; }

				if ($apache_subtype eq $subtype) { $apache_match = 1; last; }
				if ($apache_subtype =~ /^x[-\.]/ &&
					$apache_subtype =~ /$subtype/) {
					push @defacto, $apache_subtype;
				}
			}
			if (!$apache_match) {
				foreach my $defacto (@defacto) {
					print "Apache de facto standards suggests $type/$defacto instead of $mime\n";
				}
			}
		}
	}
	else {
		# make sure there is no registered type
		$subtype =~ s/^(x[-\.])//i;
		my $prefix = $1;
		foreach my $reg_subtype (@{$content_types{$type}})
		{
			if (lc($subtype) eq lc($reg_subtype)) { $match = 1; }
			elsif ($reg_subtype =~ /$subtype/)
				{ push @approx, $reg_subtype; }
		}
		if ($match) {
			print "$mime is prefixed with $prefix, but looks like a registered type; use $type/$subtype instead.\n";
		}
		else {
			if (scalar(@approx) > 0) {
				print "Did you mean one of the following for $mime: ";
				foreach my $approx (@approx) {
					print "$type/$approx ";
				}
				print "\n";
			}
			my @defacto = ();
			my $apache_match = 0;
			foreach my $apache_type (@apache_types) {
				my ($apache_maintype, $apache_subtype) = split /\//, $apache_type;
				if ($apache_maintype ne $type) { next; }

				my $real_subtype = $apache_subtype;
				if ($apache_subtype =~ /^x[-\.](.*)/) {
					$real_subtype = $1;
				}

				if ($real_subtype eq $subtype) { $apache_match = 1; last; }
				if ($apache_subtype =~ /^x[-\.]/ &&
					$apache_subtype =~ /$subtype/) {
					push @defacto, $apache_subtype;
				}
			}
			if (!$apache_match) {
				foreach my $defacto (@defacto) {
					print "Apache de facto standards suggests $type/$defacto instead of $mime\n";
				}
			}
		}
	}

}
