Hi, there has quite some time gone without any news in this bug report. I'm trying again with a new patch. It uses parts of the previously submitted patches, but differs in some aspects: * I wrote it first, and then checked the BTS for patches - my bad ;-). * I re-added the file grouping according to license+copyright similarity, as IMVHO the removal of this feature defeats the whole purpose of automatizing this information retrieval. * It parses all licenses (known to licensecheck), irrespective of their mentioning in DEP5, as otherwise the previous patch might have missed some licenses completely. Non-standardized (as seen from DEP5) license abbreviations are still prefixed by "other", for clarity.
It works for me with a real world example, without breaking current functionality. Of course, there is any arbitrary amount of further polishing possible, but I think, it would be already worth the addition in the current state, as it at least helps a bit in the creation of a machine-readable debian/copyright file. Even with this, there is of course enough work left to do for the maintainer to get her/his debian/copyright file into shape. Thanks for consideration! I am of course very open to any kind of comments! Best Regards, Jan -- Jan Beyer happy Debian Maintainer ;-) mail j...@beathovn.de GPG key ID 0x0CA6B4AA jabber beath...@jabber.org web http://www.beathovn.de/
--- /usr/bin/licensecheck 2010-01-03 05:16:46.000000000 +0100 +++ ./licensecheck 2010-01-12 22:12:11.000000000 +0100 @@ -28,7 +28,7 @@ B<licensecheck> B<--help|--version> B<licensecheck> [B<--no-conf>] [B<--verbose>] [B<--copyright>] -[B<-l|--lines=N>] [B<-i|--ignore=regex>] [B<-c|--check=regex>] +[B<--dep5] [B<-l|--lines=N>] [B<-i|--ignore=regex>] [B<-c|--check=regex>] [B<-r|--recursive>] I<list of files and directories to check> =head1 DESCRIPTION @@ -78,6 +78,12 @@ Also display copyright text found within the file +=item B<--dep5> + +Use a DEP5-like machine-readable output format, where files with the same +copyright and license are grouped together. +For more information on DEP5 see http://dep.debian.net/deps/dep5/. + =item B<--no-conf> B<--noconf> Do not read any configuration files. This can only be used as the first @@ -107,6 +113,13 @@ the license(s) in use. This is equivalent to the --lines command line option. +=item B<LICENSECHECK_DEP5> + +If this is set to I<yes>, then a DEP5-like machine-readable output format is +used, where files with the same copyright and license are grouped together. The +default is I<no>. +For more information on DEP5 see http://dep.debian.net/deps/dep5/. + =back =head1 LICENSE @@ -130,6 +143,7 @@ use File::Basename; sub fatal($); +sub additem(@); sub parse_copyright($); sub parselicense($); @@ -166,6 +180,7 @@ my $opt_copyright = 0; my ($opt_help, $opt_version); my $def_lines = 60; +my $opt_dep5 = 0; # Read configuration files and then command line # This is boilerplate @@ -178,6 +193,7 @@ my %config_vars = ( 'LICENSECHECK_VERBOSE' => 'no', 'LICENSECHECK_PARSELINES' => $def_lines, + 'LICENSECHECK_DEP5' => 'no' ); my %config_default = %config_vars; @@ -198,6 +214,8 @@ or $config_vars{'LICENSECHECK_VERBOSE'} = 'no'; $config_vars{'LICENSECHECK_PARSELINES'} =~ /^[1-9][0-9]*$/ or $config_vars{'LICENSECHECK_PARSELINES'} = $def_lines; + $config_vars{'LICENSECHECK_DEP5'} =~ /^(yes|no)$/ + or $config_vars{'LICENSECHECK_DEP5'} = 'no'; foreach my $var (sort keys %config_vars) { if ($config_vars{$var} ne $config_default{$var}) { @@ -209,6 +227,8 @@ $opt_verbose = $config_vars{'LICENSECHECK_VERBOSE'} eq 'yes' ? 1 : 0; $opt_lines = $config_vars{'LICENSECHECK_PARSELINES'}; + + $opt_dep5 = $config_vars{'LICENSECHECK_DEP5'} eq 'yes' ? 1 : 0; } GetOptions("help|h" => \$opt_help, @@ -221,6 +241,7 @@ "copyright" => \$opt_copyright, "noconf" => \$opt_noconf, "no-conf" => \$opt_noconf, + "dep5" => \$opt_dep5, ) or die "Usage: $progname [options] filelist\nRun $progname --help for more details\n"; @@ -266,6 +287,8 @@ } } +my @debcori = (); # array to keep all (filename, copyright, license) information + while (@files) { my $file = shift @files; my $content = ''; @@ -298,12 +321,51 @@ $content =~ tr/ //s; $license = parselicense($content); - print "$file: "; - print "*No copyright* " unless $copyright; - print $license . "\n"; - print " [Copyright: " . $copyright . "]\n" - if $copyright and $opt_copyright; - print "\n" if $opt_copyright; + + if ($opt_dep5) { + my $stripfile = $file; + $stripfile =~ s/^.\///; # remove leading "./" for legibility reasons + additem($stripfile, $copyright, $license); + } else { + print "$file: "; + print "*No copyright* " unless $copyright; + print $license . "\n"; + print " [Copyright: " . $copyright . "]\n" + if $copyright and $opt_copyright; + print "\n" if $opt_copyright; + } +} + +if ($opt_dep5) { +# Print array debcori formatted + print "THIS DATA NEEDS HEAVY MANUAL EDITING/CHECKING BEFORE FORMING A VALID\n"; + print "debian/copyright FILE. EVEN COMPLIANCE WITH DEP5 NEEDS TO BE CHECKED!\n"; + print "For information on DEP5 see http://dep.debian.net/deps/dep5/\n"; + print "\n"; + print "Format-Specification: <Fill in the correct URI>\n"; + print "Name: <software name>\n"; + print "Maintainer: <upstream maintainer name and address>\n"; + print "Source: <URI>\n"; + print "Disclaimer: <this line may be used for non-free and contrib packages>\n"; + print "\n"; + print "Files: *\n"; + print "Copyright: <Fill in the general copyright of this software>\n"; + print "License: <Fill in the general license of this software>\n"; + print " <License text>\n"; + print " Don't forget to remove the corresponding following section.\n"; + print "\n"; + print "Files: debian/*\n"; + print "Copyright: <Fill in the copyright of the Debian packaging>\n"; + print "License: <Fill in the license of the Debian packaging>\n"; + print " <License text>\n"; + print "\n"; + + for (my $i=0; $i<scalar(@debcori); $i++) { + print "Files: "."$debcori[$i]->[0]\n"; + print "Copyright: "."$debcori[$i]->[1]\n"; + print "License: "."$debcori[$i]->[2]\n\n"; + } + } sub parse_copyright($) { @@ -359,6 +421,7 @@ (Default: '$default_check_regex') --recursive, -r Add the contents of directories recursively --copyright Also display the file's copyright + --dep5 Use DEP5-like machine-readable output format --ignore, -i Specify that files / directories matching the regular expression should be ignored when checking files @@ -388,15 +451,20 @@ my $gplver = ""; my $extrainfo = ""; my $license = ""; + my $mr_version = ""; + my @mr_licenses = (); if ($licensetext =~ /version ([^ ]+) (?:\(?only\)?.? )?(?:of the GNU (Affero )?General Public License )?as published by the Free Software Foundation/i or $licensetext =~ /GNU (?:Affero )?General Public License as published by the Free Software Foundation; version ([^ ]+) /i) { $gplver = " (v$1)"; + $mr_version = "$1"; } elsif ($licensetext =~ /GNU (Affero ?)General Public License, version ([^ ]+?)[ .]/) { $gplver = " (v$1)"; + $mr_version = "$1+"; } elsif ($licensetext =~ /either version ([^ ]+) of the License, or \(at your option\) any later version/) { $gplver = " (v$1 or later)"; + $mr_version = "$1+"; } if ($licensetext =~ /(?:675 Mass Ave|59 Temple Place|51 Franklin Steet|02139|02111-1307)/i) { @@ -409,130 +477,176 @@ if ($licensetext =~ /(All changes made in this file will be lost|DO NOT (EDIT|delete this file)|Generated by)/i) { $license = "GENERATED FILE"; + push(@mr_licenses, "GENERATED FILE"); } if ($licensetext =~ /is free software.? you can redistribute it and\/or modify it under the terms of the (GNU (Library|Lesser) General Public License|LGPL)/i) { $license = "LGPL$gplver$extrainfo $license"; + push(@mr_licenses, "LGPL-$mr_version$extrainfo"); } if ($licensetext =~ /is free software.? you can redistribute it and\/or modify it under the terms of the (GNU Affero General Public License|AGPL)/i) { $license = "AGPL$gplver$extrainfo $license"; + push(@mr_licenses, "AGPL$gplver$extrainfo"); } if ($licensetext =~ /is free software.? you (can|may) redistribute it and\/or modify it under the terms of (?:version [^ ]+ (?:\(?only\)? )?of )?the GNU General Public License/i) { $license = "GPL$gplver$extrainfo $license"; + push(@mr_licenses, "GPL-$mr_version$extrainfo"); } if ($licensetext =~ /is distributed under the terms of the GNU General Public License,/ and length $gplver) { $license = "GPL$gplver$extrainfo $license"; + push(@mr_licenses, "GPL-$mr_version$extrainfo"); } if ($licensetext =~ /is distributed.*terms.*GPL/) { $license = "GPL (unversioned/unknown version) $license"; + push(@mr_licenses, "GPL"); } if ($licensetext =~ /This file is part of the .*Qt GUI Toolkit. This file may be distributed under the terms of the Q Public License as defined/) { $license = "QPL (part of Qt) $license"; + push(@mr_licenses, "QPL"); + } elsif ($licensetext =~ /may be distributed under the terms of the Q Public License as defined/) { $license = "QPL $license"; + push(@mr_licenses, "QPL"); } if ($licensetext =~ /http:\/\/opensource\.org\/licenses\/mit-license\.php/) { $license = "MIT/X11 (BSD like) $license"; + push(@mr_licenses, "other (MIT) (check exact wording)"); } elsif ($licensetext =~ /Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files \(the Software\), to deal in the Software/) { $license = "MIT/X11 (BSD like) $license"; + push(@mr_licenses, "other (MIT) (check exact wording)"); } if ($licensetext =~ /Permission to use, copy, modify, and(\/or)? distribute this software for any purpose with or without fee is hereby granted, provided.*copyright notice.*permission notice.*all copies/) { $license = "ISC $license"; + push(@mr_licenses, "other (ISC)"); } if ($licensetext =~ /THIS SOFTWARE IS PROVIDED .*AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY/) { if ($licensetext =~ /All advertising materials mentioning features or use of this software must display the following acknowledge?ment.*This product includes software developed by/i) { $license = "BSD (4 clause) $license"; + push(@mr_licenses, "BSD (4 clause)"); } elsif ($licensetext =~ /(The name of .*? may not|Neither the names? of .*? nor the names of (its|their) contributors may) be used to endorse or promote products derived from this software/i) { $license = "BSD (3 clause) $license"; + push(@mr_licenses, "BSD (3 clause)"); } elsif ($licensetext =~ /Redistributions of source code must retain the above copyright notice/i) { $license = "BSD (2 clause) $license"; + push(@mr_licenses, "BSD (2 clause)"); } else { $license = "BSD $license"; + push(@mr_licenses, "BSD"); } } if ($licensetext =~ /Mozilla Public License Version ([^ ]+)/) { $license = "MPL (v$1) $license"; + push(@mr_licenses, "MPL-$1"); } if ($licensetext =~ /Released under the terms of the Artistic License ([^ ]+)/) { $license = "Artistic (v$1) $license"; + push(@mr_licenses, "Artistic-$1"); } if ($licensetext =~ /is free software under the Artistic [Ll]icense/) { $license = "Artistic $license"; + push(@mr_licenses, "Artistic"); } if ($licensetext =~ /This program is free software; you can redistribute it and\/or modify it under the same terms as Perl itself/) { $license = "Perl $license"; + push(@mr_licenses, "Perl"); } if ($licensetext =~ /under the Apache License, Version ([^ ]+) \(the License\)/) { $license = "Apache (v$1) $license"; + push(@mr_licenses, "Apache-$1"); } if ($licensetext =~ /This source file is subject to version ([^ ]+) of the PHP license/) { $license = "PHP (v$1) $license"; + push(@mr_licenses, "other (PHP-$1) (problematic if not applying to PHP itself)"); } if ($licensetext =~ /under the terms of the CeCILL /) { $license = "CeCILL $license"; + push(@mr_licenses, "other (CeCILL)"); } if ($licensetext =~ /under the terms of the CeCILL-([^ ]+) /) { $license = "CeCILL-$1 $license"; + push(@mr_licenses, "other (CeCILL-$1)"); } if ($licensetext =~ /under the SGI Free Software License B/) { $license = "SGI Free Software License B $license"; + push(@mr_licenses, "other (SGI Free Software License B)"); } if ($licensetext =~ /is in the public domain/i) { $license = "Public domain"; + push(@mr_licenses, "other (PD (should be stated in the copyright field))"); } if ($licensetext =~ /terms of the Common Development and Distribution License(, Version ([^(]+))? \(the License\)/) { $license = "CDDL " . ($1 ? "(v$2) " : '') . $license; + push(@mr_licenses, "CDDL".($1 ? "-$2" : '')); } if ($licensetext =~ /Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license \(the \"Software\"\)/ or $licensetext =~ /Boost Software License([ ,-]+Version ([^ ]+)?(\.))/i) { $license = "BSL " . ($1 ? "(v$2) " : '') . $license; + push(@mr_licenses, "other (BSL".($1 ? "-$2" : '').")"); } if ($licensetext =~ /PYTHON SOFTWARE FOUNDATION LICENSE (VERSION ([^ ]+))/i) { $license = "PSF " . ($1 ? "(v$2) " : '') . $license; + push(@mr_licenses, "PSF" . ($1 ? "-$2" : '')); } if ($licensetext =~ /The origin of this software must not be misrepresented.*Altered source versions must be plainly marked as such.*This notice may not be removed or altered from any source distribution/ or $licensetext =~ /see copyright notice in zlib\.h/) { $license = "zlib/libpng $license"; + push(@mr_licenses, "ZLIB"); } - if ($licensetext =~ /Do What The Fuck You Want To Public License, Version ([^, ]+)/i) { - $license = "WTFPL (v$1)"; - } + $license = "UNKNOWN" if (!length($license)); - if ($licensetext =~ /Do what The Fuck You Want To Public License/i) { - $license = "WTFPL"; + if ($opt_dep5) { + return join(" | ", @mr_licenses) if @mr_licenses; } - if ($licensetext =~ /(License WTFPL|Under (the|a) WTFPL)/i) { - $license = "WTFPL"; - } + return $license; +} - $license = "UNKNOWN" if (!length($license)); +sub additem(@) { +# Adds a reference to a subarray (filenames, copyright, license) to the array +# debcori if the current copyright+license combination has not yet been seen. +# Otherwise just adds the filename to the appropriate filenames-element. + + my $l = scalar(@debcori); + my $samecrlicexists = 0; + + for (my $i=0; $i<$l; $i++) { + + if (($debcori[$i]->[1] eq $_[1]) and ($debcori[$i]->[2] eq $_[2])) { + # identical copyright line exists already + $debcori[$i]->[0] = join(", ",($debcori[$i]->[0], $_[0])); + $samecrlicexists=1; + } + } + + if ($samecrlicexists == 0) { + # no identical copyright line exists, thus appending new array as reference + push(@debcori, \...@_); + } - return $license; } sub fatal($) {