[ https://issues.apache.org/jira/browse/LUCENE-9283?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17062418#comment-17062418 ]
Alan Woodward commented on LUCENE-9283: --------------------------------------- Straightforward diff here: {code:java} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java index be81c0acab2..f1ac4426a58 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java @@ -62,6 +62,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ValidatingTokenFilter; +import org.apache.lucene.analysis.boost.DelimitedBoostTokenFilter; import org.apache.lucene.analysis.charfilter.NormalizeCharMap; import org.apache.lucene.analysis.cjk.CJKBigramFilter; import org.apache.lucene.analysis.commongrams.CommonGramsFilter; @@ -198,6 +199,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase { WordDelimiterGraphFilter.class, // requires a special encoded token value, so it may fail with random data: DelimitedTermFrequencyTokenFilter.class, + // requires a special encoded token value, so it may fail with random data: + DelimitedBoostTokenFilter.class, // clones of core's filters: org.apache.lucene.analysis.core.StopFilter.class, org.apache.lucene.analysis.core.LowerCaseFilter.class)) { {code} > DelimitedBoostTokenFilter can fail testRandomChains > --------------------------------------------------- > > Key: LUCENE-9283 > URL: https://issues.apache.org/jira/browse/LUCENE-9283 > Project: Lucene - Core > Issue Type: Bug > Reporter: Alan Woodward > Assignee: Alan Woodward > Priority: Major > > DelimitedBoostTokenFilter expects tokens of the form `token` or > `token|number` and throws a NumberFormatException if the `number` part can't > be parsed. This can cause test failures when we build random chains and > throw random data through them. > We can either exclude DelimiteBoostTokenFilter when building a random > analyzer, or add a flag to ignore badly-formed tokens. I lean towards doing > the former, as I don't really want to make leniency the default here. -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org