rmuir commented on code in PR #14246: URL: https://github.com/apache/lucene/pull/14246#discussion_r1957377876
########## lucene/analysis/common/build.gradle: ########## @@ -24,51 +26,104 @@ dependencies { moduleTestImplementation project(':lucene:test-framework') } -// Fetch the data and enable regression tests against woorm/ libreoffice dictionaries. -task checkoutHunspellRegressionRepos() { - ext { - checkoutDir = file("${buildDir}/hunspell-regressions") - } +// Enable Hunspell tests against LibreOffice/ Woorm dictionaries. We pull +// these dictionaries dynamically from git of each respective project. To keep +// things consistent across pull requests/ re-runs, we use a fixed git commit +// for each project ({@linkplain https://github.com/apache/lucene/issues/14235 #14235}), +// with a periodic workflow running against the latest commit on each +// project's respective development branch. + +// A gradle property with the parent directory for all git clones for each project. +def cloneDirProperty = project.layout.buildDirectory.dir("hunspell-regressions") - outputs.dir checkoutDir - doFirst { - // Clone the repositories we need if they don't exist. +// The list of dictionary projects to pull/ check against. Also includes +// a full commit reference for each project. These should be updated +// from time to time based on what's available at the head reference. +def dictionaryProjects = [ [ - "libreoffice": "https://github.com/LibreOffice/dictionaries", - "woorm": "https://github.com/wooorm/dictionaries" - ].each { name, repo -> - if (!file("${checkoutDir}/${name}").exists()) { - checkoutDir.mkdirs() - // This will work only if git is available, but I assume it is. - project.exec { + "name": "libreoffice", + "url": "https://github.com/LibreOffice/dictionaries", + "ref": "762abe74008b94b2ff06db6f4024b59a8254c467" // head: master + ], + [ + "name": "woorm", + "url": "https://github.com/wooorm/dictionaries", + "ref": "8cfea406b505e4d7df52d5a19bce525df98c54ab" // head: main + ] +] Review Comment: yes, honestly any revision will work for the intended purpose of preventing regressions. If we want to discover new and interesting ways that dictionaries can be malformed in the wild that we should try to detect, that is different :) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org