dweiss commented on code in PR #14246: URL: https://github.com/apache/lucene/pull/14246#discussion_r1957375830
########## lucene/analysis/common/build.gradle: ########## @@ -24,51 +26,104 @@ dependencies { moduleTestImplementation project(':lucene:test-framework') } -// Fetch the data and enable regression tests against woorm/ libreoffice dictionaries. -task checkoutHunspellRegressionRepos() { - ext { - checkoutDir = file("${buildDir}/hunspell-regressions") - } +// Enable Hunspell tests against LibreOffice/ Woorm dictionaries. We pull +// these dictionaries dynamically from git of each respective project. To keep +// things consistent across pull requests/ re-runs, we use a fixed git commit +// for each project ({@linkplain https://github.com/apache/lucene/issues/14235 #14235}), +// with a periodic workflow running against the latest commit on each +// project's respective development branch. + +// A gradle property with the parent directory for all git clones for each project. +def cloneDirProperty = project.layout.buildDirectory.dir("hunspell-regressions") - outputs.dir checkoutDir - doFirst { - // Clone the repositories we need if they don't exist. +// The list of dictionary projects to pull/ check against. Also includes +// a full commit reference for each project. These should be updated +// from time to time based on what's available at the head reference. +def dictionaryProjects = [ [ - "libreoffice": "https://github.com/LibreOffice/dictionaries", - "woorm": "https://github.com/wooorm/dictionaries" - ].each { name, repo -> - if (!file("${checkoutDir}/${name}").exists()) { - checkoutDir.mkdirs() - // This will work only if git is available, but I assume it is. - project.exec { + "name": "libreoffice", + "url": "https://github.com/LibreOffice/dictionaries", + "ref": "762abe74008b94b2ff06db6f4024b59a8254c467" // head: master + ], + [ + "name": "woorm", + "url": "https://github.com/wooorm/dictionaries", + "ref": "8cfea406b505e4d7df52d5a19bce525df98c54ab" // head: main + ] +] Review Comment: This will need to be updated by hand, occasionally. The frequency of changes in those repositories isn't that great so no problem there, I think? ########## lucene/core/src/java/module-info.java: ########## @@ -63,6 +63,14 @@ // Open certain packages for the test framework (ram usage tester). opens org.apache.lucene.document to org.apache.lucene.test_framework; + opens org.apache.lucene.util.fst to + org.apache.lucene.test_framework; + opens org.apache.lucene.store to + org.apache.lucene.test_framework; + opens org.apache.lucene.util.automaton to + org.apache.lucene.test_framework; + opens org.apache.lucene.util to + org.apache.lucene.test_framework; Review Comment: Had to add these to make ram testing happy now that the tests run in module mode. ########## .github/workflows/run-scheduled-hunspell.yml: ########## @@ -0,0 +1,32 @@ +name: "Run scheduled checks: Hunspell tests against latest dictionaries" + +on: + workflow_dispatch: + + schedule: + # 4:13 on Mondays + - cron: '13 4 * * 1' + +env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + +jobs: + test: + name: Hunspell regression tests against latest dictionaries + timeout-minutes: 15 + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/prepare-for-build + + - name: Run Hunspell regression tests against latest commits in dictionary repositories + run: > + ./gradlew -p lucene/analysis/common + -Ptests.hunspell.regressions=true + -Ptests.verbose=true + -Ptests.hunspell.libreoffice.ref=master + -Ptests.hunspell.woorm.ref=main Review Comment: Here we pick the 'latest' commit from both branches for the scheduled workflow. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org