Hi,

there has quite some time gone without any news in this bug report. I'm
trying again with a new patch.
It uses parts of the previously submitted patches, but differs in some aspects:
 * I wrote it first, and then checked the BTS for patches - my bad ;-).
 * I re-added the file grouping according to license+copyright similarity,
   as IMVHO the removal of this feature defeats the whole purpose
   of automatizing this information retrieval.
 * It parses all licenses (known to licensecheck), irrespective of their
   mentioning in DEP5, as otherwise the previous patch might have missed
   some licenses completely. Non-standardized (as seen from DEP5) license
   abbreviations are still prefixed by "other", for clarity.

It works for me with a real world example, without breaking current
functionality. Of course, there is any arbitrary amount of further polishing
possible, but I think, it would be already worth the addition in the current
state, as it at least helps a bit in the creation of a machine-readable
debian/copyright file. Even with this, there is of course enough work left
to do for the maintainer to get her/his debian/copyright file into shape.

Thanks for consideration!

I am of course very open to any kind of comments!

Best Regards,
Jan

-- 
Jan Beyer                               happy Debian Maintainer ;-)     

mail    j...@beathovn.de                        GPG key ID 0x0CA6B4AA
jabber  beath...@jabber.org
web     http://www.beathovn.de/
--- /usr/bin/licensecheck	2010-01-03 05:16:46.000000000 +0100
+++ ./licensecheck	2010-01-12 22:12:11.000000000 +0100
@@ -28,7 +28,7 @@
 B<licensecheck> B<--help|--version>
 
 B<licensecheck> [B<--no-conf>] [B<--verbose>] [B<--copyright>]
-[B<-l|--lines=N>] [B<-i|--ignore=regex>] [B<-c|--check=regex>]
+[B<--dep5] [B<-l|--lines=N>] [B<-i|--ignore=regex>] [B<-c|--check=regex>]
 [B<-r|--recursive>] I<list of files and directories to check>
 
 =head1 DESCRIPTION
@@ -78,6 +78,12 @@
 
 Also display copyright text found within the file
 
+=item B<--dep5>
+
+Use a DEP5-like machine-readable output format, where files with the same
+copyright and license are grouped together.
+For more information on DEP5 see http://dep.debian.net/deps/dep5/.
+
 =item B<--no-conf> B<--noconf>
 
 Do not read any configuration files. This can only be used as the first
@@ -107,6 +113,13 @@
 the license(s) in use.  This is equivalent to the --lines command line
 option.
 
+=item B<LICENSECHECK_DEP5>
+
+If this is set to I<yes>, then a DEP5-like machine-readable output format is
+used, where files with the same copyright and license are grouped together. The
+default is I<no>.
+For more information on DEP5 see http://dep.debian.net/deps/dep5/.
+
 =back
 
 =head1 LICENSE
@@ -130,6 +143,7 @@
 use File::Basename;
 
 sub fatal($);
+sub additem(@);
 sub parse_copyright($);
 sub parselicense($);
 
@@ -166,6 +180,7 @@
 my $opt_copyright = 0;
 my ($opt_help, $opt_version);
 my $def_lines = 60;
+my $opt_dep5 = 0;
 
 # Read configuration files and then command line
 # This is boilerplate
@@ -178,6 +193,7 @@
     my %config_vars = (
 		       'LICENSECHECK_VERBOSE' => 'no',
 		       'LICENSECHECK_PARSELINES' => $def_lines,
+		       'LICENSECHECK_DEP5' => 'no'
 		      );
     my %config_default = %config_vars;
 
@@ -198,6 +214,8 @@
 	or $config_vars{'LICENSECHECK_VERBOSE'} = 'no';
     $config_vars{'LICENSECHECK_PARSELINES'} =~ /^[1-9][0-9]*$/
 	or $config_vars{'LICENSECHECK_PARSELINES'} = $def_lines;
+    $config_vars{'LICENSECHECK_DEP5'} =~ /^(yes|no)$/
+	or $config_vars{'LICENSECHECK_DEP5'} = 'no';
 
     foreach my $var (sort keys %config_vars) {
 	if ($config_vars{$var} ne $config_default{$var}) {
@@ -209,6 +227,8 @@
 
     $opt_verbose = $config_vars{'LICENSECHECK_VERBOSE'} eq 'yes' ? 1 : 0;
     $opt_lines = $config_vars{'LICENSECHECK_PARSELINES'};
+    
+    $opt_dep5 = $config_vars{'LICENSECHECK_DEP5'} eq 'yes' ? 1 : 0;
 }
 
 GetOptions("help|h" => \$opt_help,
@@ -221,6 +241,7 @@
 	   "copyright" => \$opt_copyright,
 	   "noconf" => \$opt_noconf,
 	   "no-conf" => \$opt_noconf,
+	   "dep5" => \$opt_dep5,
 	   )
     or die "Usage: $progname [options] filelist\nRun $progname --help for more details\n";
 
@@ -266,6 +287,8 @@
     }
 }
 
+my @debcori = (); # array to keep all (filename, copyright, license) information
+
 while (@files) {
     my $file = shift @files;
     my $content = '';
@@ -298,12 +321,51 @@
     $content =~ tr/ //s;
 
     $license = parselicense($content);
-    print "$file: ";
-    print "*No copyright* " unless $copyright;
-    print $license . "\n";
-    print "  [Copyright: " . $copyright . "]\n"
-      if $copyright and $opt_copyright;
-    print "\n" if $opt_copyright;
+    
+    if ($opt_dep5) {
+      my $stripfile = $file;
+      $stripfile =~ s/^.\///;	# remove leading "./" for legibility reasons
+      additem($stripfile, $copyright, $license);
+    } else {
+      print "$file: ";
+      print "*No copyright* " unless $copyright;
+      print $license . "\n";
+      print "  [Copyright: " . $copyright . "]\n"
+        if $copyright and $opt_copyright;
+      print "\n" if $opt_copyright;
+    }
+}
+
+if ($opt_dep5) {
+# Print array debcori formatted
+  print "THIS DATA NEEDS HEAVY MANUAL EDITING/CHECKING BEFORE FORMING A VALID\n";
+  print "debian/copyright FILE. EVEN COMPLIANCE WITH DEP5 NEEDS TO BE CHECKED!\n";
+  print "For information on DEP5 see http://dep.debian.net/deps/dep5/\n";;
+  print "\n";
+  print "Format-Specification: <Fill in the correct URI>\n";
+  print "Name: <software name>\n";
+  print "Maintainer: <upstream maintainer name and address>\n";
+  print "Source: <URI>\n";
+  print "Disclaimer: <this line may be used for non-free and contrib packages>\n";
+  print "\n";
+  print "Files: *\n";
+  print "Copyright: <Fill in the general copyright of this software>\n";
+  print "License: <Fill in the general license of this software>\n";
+  print " <License text>\n";
+  print " Don't forget to remove the corresponding following section.\n";
+  print "\n";
+  print "Files: debian/*\n";
+  print "Copyright: <Fill in the copyright of the Debian packaging>\n";
+  print "License: <Fill in the license of the Debian packaging>\n";
+  print " <License text>\n";
+  print "\n";
+
+  for (my $i=0; $i<scalar(@debcori); $i++) {
+    print "Files: "."$debcori[$i]->[0]\n";
+    print "Copyright: "."$debcori[$i]->[1]\n";
+    print "License: "."$debcori[$i]->[2]\n\n";
+  }
+
 }
 
 sub parse_copyright($) {
@@ -359,6 +421,7 @@
                              (Default: '$default_check_regex')
    --recursive, -r        Add the contents of directories recursively
    --copyright            Also display the file's copyright
+   --dep5                 Use DEP5-like machine-readable output format
    --ignore, -i		  Specify that files / directories matching the
                             regular expression should be ignored when
                             checking files
@@ -388,15 +451,20 @@
     my $gplver = "";
     my $extrainfo = "";
     my $license = "";
+    my $mr_version = "";
+    my @mr_licenses = ();
 
     if ($licensetext =~ /version ([^ ]+) (?:\(?only\)?.? )?(?:of the GNU (Affero )?General Public License )?as published by the Free Software Foundation/i or
 	$licensetext =~ /GNU (?:Affero )?General Public License as published by the Free Software Foundation; version ([^ ]+) /i) {
 
 	$gplver = " (v$1)";
+	$mr_version = "$1";
     } elsif ($licensetext =~ /GNU (Affero ?)General Public License, version ([^ ]+?)[ .]/) {
 	$gplver = " (v$1)";
+	$mr_version = "$1+";
     } elsif ($licensetext =~ /either version ([^ ]+) of the License, or \(at your option\) any later version/) {
 	$gplver = " (v$1 or later)";
+	$mr_version = "$1+";
     }
 
     if ($licensetext =~ /(?:675 Mass Ave|59 Temple Place|51 Franklin Steet|02139|02111-1307)/i) {
@@ -409,130 +477,176 @@
 
     if ($licensetext =~ /(All changes made in this file will be lost|DO NOT (EDIT|delete this file)|Generated by)/i) {
 	$license = "GENERATED FILE";
+	push(@mr_licenses, "GENERATED FILE");
     }
 
     if ($licensetext =~ /is free software.? you can redistribute it and\/or modify it under the terms of the (GNU (Library|Lesser) General Public License|LGPL)/i) {
 	$license = "LGPL$gplver$extrainfo $license";
+	push(@mr_licenses, "LGPL-$mr_version$extrainfo");
     }
     
     if ($licensetext =~ /is free software.? you can redistribute it and\/or modify it under the terms of the (GNU Affero General Public License|AGPL)/i) {
 	$license = "AGPL$gplver$extrainfo $license";
+	push(@mr_licenses, "AGPL$gplver$extrainfo");
     }
 
     if ($licensetext =~ /is free software.? you (can|may) redistribute it and\/or modify it under the terms of (?:version [^ ]+ (?:\(?only\)? )?of )?the GNU General Public License/i) {
 	$license = "GPL$gplver$extrainfo $license";
+	push(@mr_licenses, "GPL-$mr_version$extrainfo");
     }
 
     if ($licensetext =~ /is distributed under the terms of the GNU General Public License,/
 	and length $gplver) {
 	$license = "GPL$gplver$extrainfo $license";
+	push(@mr_licenses, "GPL-$mr_version$extrainfo");
     }
 
     if ($licensetext =~ /is distributed.*terms.*GPL/) {
 	$license = "GPL (unversioned/unknown version) $license";
+	push(@mr_licenses, "GPL");
     }
 
     if ($licensetext =~ /This file is part of the .*Qt GUI Toolkit. This file may be distributed under the terms of the Q Public License as defined/) {
 	$license = "QPL (part of Qt) $license";
+	push(@mr_licenses, "QPL");
+
     } elsif ($licensetext =~ /may be distributed under the terms of the Q Public License as defined/) {
 	$license = "QPL $license";
+	push(@mr_licenses, "QPL");
     }
 
     if ($licensetext =~ /http:\/\/opensource\.org\/licenses\/mit-license\.php/) {
 	$license = "MIT/X11 (BSD like) $license";
+    push(@mr_licenses, "other (MIT) (check exact wording)");
     } elsif ($licensetext =~ /Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files \(the Software\), to deal in the Software/) {
 	$license = "MIT/X11 (BSD like) $license";
+    push(@mr_licenses, "other (MIT) (check exact wording)");
     }
 
     if ($licensetext  =~ /Permission to use, copy, modify, and(\/or)? distribute this software for any purpose with or without fee is hereby granted, provided.*copyright notice.*permission notice.*all copies/) {
 	$license = "ISC $license";
+	push(@mr_licenses, "other (ISC)");
     }
 
     if ($licensetext =~ /THIS SOFTWARE IS PROVIDED .*AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY/) {
 	if ($licensetext =~ /All advertising materials mentioning features or use of this software must display the following acknowledge?ment.*This product includes software developed by/i) {
 	    $license = "BSD (4 clause) $license";
+		push(@mr_licenses, "BSD (4 clause)");
 	} elsif ($licensetext =~ /(The name of .*? may not|Neither the names? of .*? nor the names of (its|their) contributors may) be used to endorse or promote products derived from this software/i) {
 	    $license = "BSD (3 clause) $license";
+   		push(@mr_licenses, "BSD (3 clause)");
 	} elsif ($licensetext =~ /Redistributions of source code must retain the above copyright notice/i) {
 	    $license = "BSD (2 clause) $license";
+   		push(@mr_licenses, "BSD (2 clause)");
 	} else {
 	    $license = "BSD $license";
+   		push(@mr_licenses, "BSD");
 	}
     }
 
     if ($licensetext =~ /Mozilla Public License Version ([^ ]+)/) {
 	$license = "MPL (v$1) $license";
+	push(@mr_licenses, "MPL-$1");
     }
 
     if ($licensetext =~ /Released under the terms of the Artistic License ([^ ]+)/) {
 	$license = "Artistic (v$1) $license";
+	push(@mr_licenses, "Artistic-$1");
     }
 
     if ($licensetext =~ /is free software under the Artistic [Ll]icense/) {
 	$license = "Artistic $license";
+	push(@mr_licenses, "Artistic");
     }
 
     if ($licensetext =~ /This program is free software; you can redistribute it and\/or modify it under the same terms as Perl itself/) {
 	$license = "Perl $license";
+	push(@mr_licenses, "Perl");
     }
 
     if ($licensetext =~ /under the Apache License, Version ([^ ]+) \(the License\)/) {
 	$license = "Apache (v$1) $license";
+    push(@mr_licenses, "Apache-$1");
     }
 
     if ($licensetext =~ /This source file is subject to version ([^ ]+) of the PHP license/) {
 	$license = "PHP (v$1) $license";
+	push(@mr_licenses, "other (PHP-$1) (problematic if not applying to PHP itself)");
     }
 
     if ($licensetext =~ /under the terms of the CeCILL /) {
 	$license = "CeCILL $license";
+	push(@mr_licenses, "other (CeCILL)");
     }
 
     if ($licensetext =~ /under the terms of the CeCILL-([^ ]+) /) {
 	$license = "CeCILL-$1 $license";
+	push(@mr_licenses, "other (CeCILL-$1)");
     }
 
     if ($licensetext =~ /under the SGI Free Software License B/) {
 	$license = "SGI Free Software License B $license";
+	push(@mr_licenses, "other (SGI Free Software License B)");
     }
 
     if ($licensetext =~ /is in the public domain/i) {
 	$license = "Public domain";
+	push(@mr_licenses, "other (PD (should be stated in the copyright field))");
     }
 
     if ($licensetext =~ /terms of the Common Development and Distribution License(, Version ([^(]+))? \(the License\)/) {
 	$license = "CDDL " . ($1 ? "(v$2) " : '') . $license;
+	push(@mr_licenses, "CDDL".($1 ? "-$2" : ''));
     }
 
     if ($licensetext =~ /Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license \(the \"Software\"\)/ or
 	$licensetext =~ /Boost Software License([ ,-]+Version ([^ ]+)?(\.))/i) {
 	$license = "BSL " . ($1 ? "(v$2) " : '') . $license;
+	push(@mr_licenses, "other (BSL".($1 ? "-$2" : '').")");
     }
 
     if ($licensetext =~ /PYTHON SOFTWARE FOUNDATION LICENSE (VERSION ([^ ]+))/i) {
 	$license = "PSF " . ($1 ? "(v$2) " : '') . $license;
+    push(@mr_licenses, "PSF" . ($1 ? "-$2" : ''));
     }
 
     if ($licensetext =~ /The origin of this software must not be misrepresented.*Altered source versions must be plainly marked as such.*This notice may not be removed or altered from any source distribution/ or
         $licensetext =~ /see copyright notice in zlib\.h/) {
 	$license = "zlib/libpng $license";
+	push(@mr_licenses, "ZLIB");
     }
 
-    if ($licensetext =~ /Do What The Fuck You Want To Public License, Version ([^, ]+)/i) {
-        $license = "WTFPL (v$1)";
-    }
+    $license = "UNKNOWN" if (!length($license));
 
-    if ($licensetext =~ /Do what The Fuck You Want To Public License/i) {
-        $license = "WTFPL";
+    if ($opt_dep5) {
+        return join(" | ", @mr_licenses) if @mr_licenses;
     }
 
-    if ($licensetext =~ /(License WTFPL|Under (the|a) WTFPL)/i) {
-        $license = "WTFPL";
-    }
+    return $license;
+}
 
-    $license = "UNKNOWN" if (!length($license));
+sub additem(@) {
+# Adds a reference to a subarray (filenames, copyright, license) to the array
+# debcori if the current copyright+license combination has not yet been seen.
+# Otherwise just adds the filename to the appropriate filenames-element.
+
+  my $l = scalar(@debcori);
+  my $samecrlicexists = 0;
+
+  for (my $i=0; $i<$l; $i++) {
+    
+    if (($debcori[$i]->[1] eq $_[1]) and ($debcori[$i]->[2] eq $_[2])) {
+      # identical copyright line exists already
+      $debcori[$i]->[0] = join(", ",($debcori[$i]->[0], $_[0]));
+      $samecrlicexists=1;
+    }
+  }
+
+  if ($samecrlicexists == 0) {
+    # no identical copyright line exists, thus appending new array as reference
+    push(@debcori, \...@_);
+  }
 
-    return $license;
 }
 
 sub fatal($) {

Reply via email to