dweiss commented on a change in pull request #178:
URL: https://github.com/apache/lucene/pull/178#discussion_r648973848



##########
File path: gradle/validation/rat-sources.gradle
##########
@@ -27,139 +28,122 @@ configure(rootProject) {
     }
 }
 
+// Configure the rat validation task and all scanned directories.
 allprojects {
     task("rat", type: RatTask) {
         group = 'Verification'
         description = 'Runs Apache Rat checks.'
-    }
-}
-
-configure(rootProject) {
-    rat {
-        includes += [
-            "buildSrc/**/*.java",
-            "gradle/**/*.gradle",
-            "lucene/tools/forbiddenApis/**",
-            "lucene/tools/prettify/**",
-        ]
-        excludes += [
-            // Unclear if this needs ASF header, depends on how much was 
copied from ElasticSearch
-            "**/ErrorReportingTestListener.java"
-        ]
-    }
-}
-
-configure(project(":lucene:analysis:common")) {
-    rat {
-        srcExcludes += [
-            "**/*.aff",
-            "**/*.dic",
-            "**/*.wrong",
-            "**/*.good",
-            "**/*.sug",
-            "**/charfilter/*.htm*",
-            "**/*LuceneResourcesWikiPage.html"
-        ]
-    }
-}
-
-configure(project(":lucene:analysis:kuromoji")) {
-    rat {
-        srcExcludes += [
-            // whether rat detects this as binary or not is platform 
dependent?!
-            "**/bocchan.utf-8"
-        ]
-    }
-}
 
-configure(project(":lucene:analysis:opennlp")) {
-    rat {
-        excludes += [
-            "src/tools/test-model-data/*.txt",
-        ]
-    }
-}
-
-configure(project(":lucene:highlighter")) {
-    rat {
-        srcExcludes += [
-            "**/CambridgeMA.utf8"
-        ]
-    }
-}
-
-configure(project(":lucene:suggest")) {
-    rat {
-        srcExcludes += [
-            "**/Top50KWiki.utf8",
-            "**/stop-snowball.txt"
-        ]
+        def defaultScanFileTree = project.fileTree(projectDir, {
+            // Don't check under the project's build folder.
+            exclude project.buildDir.name
+
+            // Exclude any generated stuff.
+            exclude "src/generated"
+
+            // Don't check any of the subprojects - they have their own rat 
tasks.
+            exclude subprojects.collect { it.projectDir.name }
+
+            // At the module scope we only check selected file patterns as 
folks have various .gitignore-d resources
+            // generated by IDEs, etc.
+            include "**/*.gradle"
+            include "**/*.xml"
+            include "**/*.md"
+            include "**/*.py"
+            include "**/*.sh"
+            include "**/*.bat"
+
+            // Include selected patterns from any source folders. We could 
make this
+            // relative to source sets but it seems to be of little value - 
all our source sets
+            // live under 'src' anyway.
+            include "src/**"
+            exclude "src/**/*.png"
+            exclude "src/**/*.txt"
+            exclude "src/**/*.zip"
+            exclude "src/**/*.properties"
+            exclude "src/**/*.utf8"
+
+            // Conditionally apply module-specific patterns. We do it here 
instead
+            // of reconfiguring each project because the provider can be made 
lazy
+            // and it's easier to manage this way.
+            switch (project.path) {
+                case ":":
+                    include "gradlew"
+                    include "gradlew.bat"
+                    exclude ".gradle"
+                    exclude ".idea"
+                    exclude ".muse"
+
+                    // The root project also includes patterns for the 
boostrap (buildSrc) and composite
+                    // projects. Include their sources in the scan.
+                    include "buildSrc/src/**"
+                    include "dev-tools/missing-doclet/src/**"
+                    break
+
+                case ":lucene:analysis:morfologik":
+                    exclude "src/**/*.info"
+                    exclude "src/**/*.input"
+                    break
+
+                case ":lucene:analysis:opennlp":
+                    exclude "src/**/en-test-lemmas.dict"
+                    break
+
+                case ":lucene:test-framework":
+                    exclude "src/**/europarl.lines.txt.seek"
+                    break
+
+                case ":lucene:analysis:common":
+                    exclude "src/**/*.aff"
+                    exclude "src/**/*.dic"
+                    exclude "src/**/*.good"
+                    exclude "src/**/*.sug"
+                    exclude "src/**/*.wrong"
+                    exclude "src/**/charfilter/*.htm*"
+                    exclude "src/**/*LuceneResourcesWikiPage.html"
+                    exclude "src/**/*.rslp"
+                    break
+
+                case ":lucene:benchmark":
+                    exclude "data/"
+                    break
+            }
+        })
+        inputFileTrees.add(defaultScanFileTree)
     }
 }
 
-// Structure inspired by existing task from Apache Kafka, heavily modified 
since then.
+/**
+ * An Apache RAT adapter that validates whether files contain acceptable 
licenses.
+ */
 class RatTask extends DefaultTask {
-    @Input
-    List<String> includes = [
-        "*.gradle",
-        "*.xml",
-        "src/tools/**"
-    ]
-
-    @Input
-    List<String> excludes = []
-
-    @Input
-    List<String> srcExcludes = [
-        "**/TODO",
-        "**/*.txt",
-        "**/*.md",
-        "**/*.iml",
-        "build/**"
-    ]
+    @InputFiles
+    ListProperty<ConfigurableFileTree> inputFileTrees = 
project.objects.listProperty(ConfigurableFileTree)

Review comment:
       This is intentionally left as a list of file trees. We only use a single 
file tree but perhaps it'll be useful in the future if we had multiple file 
trees as an input.

##########
File path: gradle/validation/rat-sources.gradle
##########
@@ -27,139 +28,122 @@ configure(rootProject) {
     }
 }
 
+// Configure the rat validation task and all scanned directories.
 allprojects {
     task("rat", type: RatTask) {
         group = 'Verification'
         description = 'Runs Apache Rat checks.'
-    }
-}
-
-configure(rootProject) {
-    rat {
-        includes += [
-            "buildSrc/**/*.java",
-            "gradle/**/*.gradle",
-            "lucene/tools/forbiddenApis/**",
-            "lucene/tools/prettify/**",
-        ]
-        excludes += [
-            // Unclear if this needs ASF header, depends on how much was 
copied from ElasticSearch
-            "**/ErrorReportingTestListener.java"
-        ]
-    }
-}
-
-configure(project(":lucene:analysis:common")) {
-    rat {
-        srcExcludes += [
-            "**/*.aff",
-            "**/*.dic",
-            "**/*.wrong",
-            "**/*.good",
-            "**/*.sug",
-            "**/charfilter/*.htm*",
-            "**/*LuceneResourcesWikiPage.html"
-        ]
-    }
-}
-
-configure(project(":lucene:analysis:kuromoji")) {
-    rat {
-        srcExcludes += [
-            // whether rat detects this as binary or not is platform 
dependent?!
-            "**/bocchan.utf-8"
-        ]
-    }
-}
 
-configure(project(":lucene:analysis:opennlp")) {
-    rat {
-        excludes += [
-            "src/tools/test-model-data/*.txt",
-        ]
-    }
-}
-
-configure(project(":lucene:highlighter")) {
-    rat {
-        srcExcludes += [
-            "**/CambridgeMA.utf8"
-        ]
-    }
-}
-
-configure(project(":lucene:suggest")) {
-    rat {
-        srcExcludes += [
-            "**/Top50KWiki.utf8",
-            "**/stop-snowball.txt"
-        ]
+        def defaultScanFileTree = project.fileTree(projectDir, {
+            // Don't check under the project's build folder.
+            exclude project.buildDir.name
+
+            // Exclude any generated stuff.
+            exclude "src/generated"
+
+            // Don't check any of the subprojects - they have their own rat 
tasks.
+            exclude subprojects.collect { it.projectDir.name }
+
+            // At the module scope we only check selected file patterns as 
folks have various .gitignore-d resources
+            // generated by IDEs, etc.
+            include "**/*.gradle"
+            include "**/*.xml"
+            include "**/*.md"
+            include "**/*.py"
+            include "**/*.sh"
+            include "**/*.bat"
+
+            // Include selected patterns from any source folders. We could 
make this
+            // relative to source sets but it seems to be of little value - 
all our source sets
+            // live under 'src' anyway.
+            include "src/**"
+            exclude "src/**/*.png"
+            exclude "src/**/*.txt"
+            exclude "src/**/*.zip"
+            exclude "src/**/*.properties"
+            exclude "src/**/*.utf8"
+
+            // Conditionally apply module-specific patterns. We do it here 
instead
+            // of reconfiguring each project because the provider can be made 
lazy
+            // and it's easier to manage this way.
+            switch (project.path) {
+                case ":":
+                    include "gradlew"
+                    include "gradlew.bat"
+                    exclude ".gradle"
+                    exclude ".idea"
+                    exclude ".muse"
+
+                    // The root project also includes patterns for the 
boostrap (buildSrc) and composite
+                    // projects. Include their sources in the scan.
+                    include "buildSrc/src/**"
+                    include "dev-tools/missing-doclet/src/**"
+                    break
+
+                case ":lucene:analysis:morfologik":
+                    exclude "src/**/*.info"
+                    exclude "src/**/*.input"
+                    break
+
+                case ":lucene:analysis:opennlp":
+                    exclude "src/**/en-test-lemmas.dict"
+                    break
+
+                case ":lucene:test-framework":
+                    exclude "src/**/europarl.lines.txt.seek"
+                    break
+
+                case ":lucene:analysis:common":
+                    exclude "src/**/*.aff"
+                    exclude "src/**/*.dic"
+                    exclude "src/**/*.good"
+                    exclude "src/**/*.sug"
+                    exclude "src/**/*.wrong"
+                    exclude "src/**/charfilter/*.htm*"
+                    exclude "src/**/*LuceneResourcesWikiPage.html"
+                    exclude "src/**/*.rslp"
+                    break
+
+                case ":lucene:benchmark":
+                    exclude "data/"
+                    break
+            }
+        })
+        inputFileTrees.add(defaultScanFileTree)
     }
 }
 
-// Structure inspired by existing task from Apache Kafka, heavily modified 
since then.
+/**
+ * An Apache RAT adapter that validates whether files contain acceptable 
licenses.
+ */
 class RatTask extends DefaultTask {
-    @Input
-    List<String> includes = [
-        "*.gradle",
-        "*.xml",
-        "src/tools/**"
-    ]
-
-    @Input
-    List<String> excludes = []
-
-    @Input
-    List<String> srcExcludes = [
-        "**/TODO",
-        "**/*.txt",
-        "**/*.md",
-        "**/*.iml",
-        "build/**"
-    ]
+    @InputFiles
+    ListProperty<ConfigurableFileTree> inputFileTrees = 
project.objects.listProperty(ConfigurableFileTree)
 
     @OutputFile
-    def xmlReport = new File(new File(project.buildDir, 'rat'), 
'rat-report.xml')
+    RegularFileProperty xmlReport = project.objects.fileProperty().convention(
+        project.layout.buildDirectory.file("rat/rat-report.xml"))
 
-    def generateXmlReport() {
+    def generateReport(File reportFile) {
+        // Set up ant rat task.
         def uri = 'antlib:org.apache.rat.anttasks'
         def ratClasspath = project.rootProject.configurations.ratDeps.asPath
         ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', uri: uri, 
classpath: ratClasspath)
-
         def rat = NamespaceBuilder.newInstance(ant, uri)
-        rat.report(format: 'xml', reportFile: xmlReport, 
addDefaultLicenseMatchers: true) {
-            ant.fileset(dir: "${project.projectDir}") {
-                includes.each { pattern -> ant.include(name: pattern) }
-                excludes.each { pattern -> ant.exclude(name: pattern) }
-            }
 
-            if (project.plugins.findPlugin(JavaPlugin)) {
-                def checkSets = [
-                    project.sourceSets.main.java.srcDirs,
-                    project.sourceSets.test.java.srcDirs,
-                ]
-
-                project.sourceSets.matching { it.name == 'tools' }.all {
-                    checkSets += project.sourceSets.tools.java.srcDirs
-                }
-
-                checkSets.flatten().each { srcLocation ->
-                    ant.fileset(dir: srcLocation, erroronmissingdir: false) {
-                        srcExcludes.each { pattern -> ant.exclude(name: 
pattern) }
-                    }
-                }
-
-                [
-                    project.sourceSets.main.resources.srcDirs
-                ].flatten().each { srcLocation ->
-                    ant.fileset(dir: srcLocation, erroronmissingdir: false) {
-                        ant.include(name: "META-INF/**")
-                    }
-                }
+        // Collect all output files for debugging.
+        String inputFileList = inputFileTrees.get().collectMany { fileTree ->
+            fileTree.asList()
+        }.sort().join("\n")
+        project.file(reportFile.path.replaceAll('.xml$', 
'-filelist.txt')).setText(inputFileList, "UTF-8")

Review comment:
       This generates and writes a list of files processed as a sibling of the 
rat report file. Easy to see what was actually included in the check.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to