dweiss commented on a change in pull request #178: URL: https://github.com/apache/lucene/pull/178#discussion_r648973848
########## File path: gradle/validation/rat-sources.gradle ########## @@ -27,139 +28,122 @@ configure(rootProject) { } } +// Configure the rat validation task and all scanned directories. allprojects { task("rat", type: RatTask) { group = 'Verification' description = 'Runs Apache Rat checks.' - } -} - -configure(rootProject) { - rat { - includes += [ - "buildSrc/**/*.java", - "gradle/**/*.gradle", - "lucene/tools/forbiddenApis/**", - "lucene/tools/prettify/**", - ] - excludes += [ - // Unclear if this needs ASF header, depends on how much was copied from ElasticSearch - "**/ErrorReportingTestListener.java" - ] - } -} - -configure(project(":lucene:analysis:common")) { - rat { - srcExcludes += [ - "**/*.aff", - "**/*.dic", - "**/*.wrong", - "**/*.good", - "**/*.sug", - "**/charfilter/*.htm*", - "**/*LuceneResourcesWikiPage.html" - ] - } -} - -configure(project(":lucene:analysis:kuromoji")) { - rat { - srcExcludes += [ - // whether rat detects this as binary or not is platform dependent?! - "**/bocchan.utf-8" - ] - } -} -configure(project(":lucene:analysis:opennlp")) { - rat { - excludes += [ - "src/tools/test-model-data/*.txt", - ] - } -} - -configure(project(":lucene:highlighter")) { - rat { - srcExcludes += [ - "**/CambridgeMA.utf8" - ] - } -} - -configure(project(":lucene:suggest")) { - rat { - srcExcludes += [ - "**/Top50KWiki.utf8", - "**/stop-snowball.txt" - ] + def defaultScanFileTree = project.fileTree(projectDir, { + // Don't check under the project's build folder. + exclude project.buildDir.name + + // Exclude any generated stuff. + exclude "src/generated" + + // Don't check any of the subprojects - they have their own rat tasks. + exclude subprojects.collect { it.projectDir.name } + + // At the module scope we only check selected file patterns as folks have various .gitignore-d resources + // generated by IDEs, etc. + include "**/*.gradle" + include "**/*.xml" + include "**/*.md" + include "**/*.py" + include "**/*.sh" + include "**/*.bat" + + // Include selected patterns from any source folders. We could make this + // relative to source sets but it seems to be of little value - all our source sets + // live under 'src' anyway. + include "src/**" + exclude "src/**/*.png" + exclude "src/**/*.txt" + exclude "src/**/*.zip" + exclude "src/**/*.properties" + exclude "src/**/*.utf8" + + // Conditionally apply module-specific patterns. We do it here instead + // of reconfiguring each project because the provider can be made lazy + // and it's easier to manage this way. + switch (project.path) { + case ":": + include "gradlew" + include "gradlew.bat" + exclude ".gradle" + exclude ".idea" + exclude ".muse" + + // The root project also includes patterns for the boostrap (buildSrc) and composite + // projects. Include their sources in the scan. + include "buildSrc/src/**" + include "dev-tools/missing-doclet/src/**" + break + + case ":lucene:analysis:morfologik": + exclude "src/**/*.info" + exclude "src/**/*.input" + break + + case ":lucene:analysis:opennlp": + exclude "src/**/en-test-lemmas.dict" + break + + case ":lucene:test-framework": + exclude "src/**/europarl.lines.txt.seek" + break + + case ":lucene:analysis:common": + exclude "src/**/*.aff" + exclude "src/**/*.dic" + exclude "src/**/*.good" + exclude "src/**/*.sug" + exclude "src/**/*.wrong" + exclude "src/**/charfilter/*.htm*" + exclude "src/**/*LuceneResourcesWikiPage.html" + exclude "src/**/*.rslp" + break + + case ":lucene:benchmark": + exclude "data/" + break + } + }) + inputFileTrees.add(defaultScanFileTree) } } -// Structure inspired by existing task from Apache Kafka, heavily modified since then. +/** + * An Apache RAT adapter that validates whether files contain acceptable licenses. + */ class RatTask extends DefaultTask { - @Input - List<String> includes = [ - "*.gradle", - "*.xml", - "src/tools/**" - ] - - @Input - List<String> excludes = [] - - @Input - List<String> srcExcludes = [ - "**/TODO", - "**/*.txt", - "**/*.md", - "**/*.iml", - "build/**" - ] + @InputFiles + ListProperty<ConfigurableFileTree> inputFileTrees = project.objects.listProperty(ConfigurableFileTree) Review comment: This is intentionally left as a list of file trees. We only use a single file tree but perhaps it'll be useful in the future if we had multiple file trees as an input. ########## File path: gradle/validation/rat-sources.gradle ########## @@ -27,139 +28,122 @@ configure(rootProject) { } } +// Configure the rat validation task and all scanned directories. allprojects { task("rat", type: RatTask) { group = 'Verification' description = 'Runs Apache Rat checks.' - } -} - -configure(rootProject) { - rat { - includes += [ - "buildSrc/**/*.java", - "gradle/**/*.gradle", - "lucene/tools/forbiddenApis/**", - "lucene/tools/prettify/**", - ] - excludes += [ - // Unclear if this needs ASF header, depends on how much was copied from ElasticSearch - "**/ErrorReportingTestListener.java" - ] - } -} - -configure(project(":lucene:analysis:common")) { - rat { - srcExcludes += [ - "**/*.aff", - "**/*.dic", - "**/*.wrong", - "**/*.good", - "**/*.sug", - "**/charfilter/*.htm*", - "**/*LuceneResourcesWikiPage.html" - ] - } -} - -configure(project(":lucene:analysis:kuromoji")) { - rat { - srcExcludes += [ - // whether rat detects this as binary or not is platform dependent?! - "**/bocchan.utf-8" - ] - } -} -configure(project(":lucene:analysis:opennlp")) { - rat { - excludes += [ - "src/tools/test-model-data/*.txt", - ] - } -} - -configure(project(":lucene:highlighter")) { - rat { - srcExcludes += [ - "**/CambridgeMA.utf8" - ] - } -} - -configure(project(":lucene:suggest")) { - rat { - srcExcludes += [ - "**/Top50KWiki.utf8", - "**/stop-snowball.txt" - ] + def defaultScanFileTree = project.fileTree(projectDir, { + // Don't check under the project's build folder. + exclude project.buildDir.name + + // Exclude any generated stuff. + exclude "src/generated" + + // Don't check any of the subprojects - they have their own rat tasks. + exclude subprojects.collect { it.projectDir.name } + + // At the module scope we only check selected file patterns as folks have various .gitignore-d resources + // generated by IDEs, etc. + include "**/*.gradle" + include "**/*.xml" + include "**/*.md" + include "**/*.py" + include "**/*.sh" + include "**/*.bat" + + // Include selected patterns from any source folders. We could make this + // relative to source sets but it seems to be of little value - all our source sets + // live under 'src' anyway. + include "src/**" + exclude "src/**/*.png" + exclude "src/**/*.txt" + exclude "src/**/*.zip" + exclude "src/**/*.properties" + exclude "src/**/*.utf8" + + // Conditionally apply module-specific patterns. We do it here instead + // of reconfiguring each project because the provider can be made lazy + // and it's easier to manage this way. + switch (project.path) { + case ":": + include "gradlew" + include "gradlew.bat" + exclude ".gradle" + exclude ".idea" + exclude ".muse" + + // The root project also includes patterns for the boostrap (buildSrc) and composite + // projects. Include their sources in the scan. + include "buildSrc/src/**" + include "dev-tools/missing-doclet/src/**" + break + + case ":lucene:analysis:morfologik": + exclude "src/**/*.info" + exclude "src/**/*.input" + break + + case ":lucene:analysis:opennlp": + exclude "src/**/en-test-lemmas.dict" + break + + case ":lucene:test-framework": + exclude "src/**/europarl.lines.txt.seek" + break + + case ":lucene:analysis:common": + exclude "src/**/*.aff" + exclude "src/**/*.dic" + exclude "src/**/*.good" + exclude "src/**/*.sug" + exclude "src/**/*.wrong" + exclude "src/**/charfilter/*.htm*" + exclude "src/**/*LuceneResourcesWikiPage.html" + exclude "src/**/*.rslp" + break + + case ":lucene:benchmark": + exclude "data/" + break + } + }) + inputFileTrees.add(defaultScanFileTree) } } -// Structure inspired by existing task from Apache Kafka, heavily modified since then. +/** + * An Apache RAT adapter that validates whether files contain acceptable licenses. + */ class RatTask extends DefaultTask { - @Input - List<String> includes = [ - "*.gradle", - "*.xml", - "src/tools/**" - ] - - @Input - List<String> excludes = [] - - @Input - List<String> srcExcludes = [ - "**/TODO", - "**/*.txt", - "**/*.md", - "**/*.iml", - "build/**" - ] + @InputFiles + ListProperty<ConfigurableFileTree> inputFileTrees = project.objects.listProperty(ConfigurableFileTree) @OutputFile - def xmlReport = new File(new File(project.buildDir, 'rat'), 'rat-report.xml') + RegularFileProperty xmlReport = project.objects.fileProperty().convention( + project.layout.buildDirectory.file("rat/rat-report.xml")) - def generateXmlReport() { + def generateReport(File reportFile) { + // Set up ant rat task. def uri = 'antlib:org.apache.rat.anttasks' def ratClasspath = project.rootProject.configurations.ratDeps.asPath ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', uri: uri, classpath: ratClasspath) - def rat = NamespaceBuilder.newInstance(ant, uri) - rat.report(format: 'xml', reportFile: xmlReport, addDefaultLicenseMatchers: true) { - ant.fileset(dir: "${project.projectDir}") { - includes.each { pattern -> ant.include(name: pattern) } - excludes.each { pattern -> ant.exclude(name: pattern) } - } - if (project.plugins.findPlugin(JavaPlugin)) { - def checkSets = [ - project.sourceSets.main.java.srcDirs, - project.sourceSets.test.java.srcDirs, - ] - - project.sourceSets.matching { it.name == 'tools' }.all { - checkSets += project.sourceSets.tools.java.srcDirs - } - - checkSets.flatten().each { srcLocation -> - ant.fileset(dir: srcLocation, erroronmissingdir: false) { - srcExcludes.each { pattern -> ant.exclude(name: pattern) } - } - } - - [ - project.sourceSets.main.resources.srcDirs - ].flatten().each { srcLocation -> - ant.fileset(dir: srcLocation, erroronmissingdir: false) { - ant.include(name: "META-INF/**") - } - } + // Collect all output files for debugging. + String inputFileList = inputFileTrees.get().collectMany { fileTree -> + fileTree.asList() + }.sort().join("\n") + project.file(reportFile.path.replaceAll('.xml$', '-filelist.txt')).setText(inputFileList, "UTF-8") Review comment: This generates and writes a list of files processed as a sibling of the rat report file. Easy to see what was actually included in the check. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org