This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push: new 3bf874e2 Optimize memory allocation 3bf874e2 is described below commit 3bf874e2141dc08550c0b330c7a7006f358bb0f0 Author: Gary Gregory <garydgreg...@gmail.com> AuthorDate: Mon Mar 18 09:27:34 2024 -0400 Optimize memory allocation Add org.apache.commons.codec.language.bm.Rule.PhonemeExpr.size() --- pom.xml | 6 +-- src/changes/changes.xml | 3 +- .../commons/codec/language/bm/PhoneticEngine.java | 55 +++++++++++----------- .../org/apache/commons/codec/language/bm/Rule.java | 27 +++++++++-- .../codec/language/bm/PhoneticEngineTest.java | 6 ++- 5 files changed, 60 insertions(+), 37 deletions(-) diff --git a/pom.xml b/pom.xml index 69f9392a..4a42ae67 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ limitations under the License. </parent> <groupId>commons-codec</groupId> <artifactId>commons-codec</artifactId> - <version>1.16.2-SNAPSHOT</version> + <version>1.17.0-SNAPSHOT</version> <name>Apache Commons Codec</name> <inceptionYear>2002</inceptionYear> <description> @@ -273,9 +273,9 @@ limitations under the License. <checkstyle.config.file>${basedir}/src/conf/checkstyle.xml</checkstyle.config.file> <jacoco.skip>false</jacoco.skip> <!-- Commons Release Plugin --> - <commons.release.version>1.16.1</commons.release.version> + <commons.release.version>1.17.0</commons.release.version> <commons.bc.version>1.16.0</commons.bc.version> - <commons.bc.next>1.16.2</commons.bc.next> + <commons.bc.next>1.17.1</commons.bc.next> <commons.rc.version>RC1</commons.rc.version> <commons.release.isDistModule>true</commons.release.isDistModule> <commons.distSvnStagingUrl>scm:svn:https://dist.apache.org/repos/dist/dev/commons/${commons.componentid}</commons.distSvnStagingUrl> diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 7180ce7d..bdb6e434 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -43,8 +43,9 @@ The <action> type attribute can be add,update,fix,remove. <author>Apache Commons Developers</author> </properties> <body> - <release version="1.16.2" date="2024-MM-DD" description="Feature and fix release. Requires a minimum of Java 8."> + <release version="1.17.0" date="YYYY-MM-DD" description="Feature and fix release. Requires a minimum of Java 8."> <!-- ADD --> + <action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.codec.language.bm.Rule.PhonemeExpr.size().</action> <!-- FIX --> <!-- UPDATE --> <action dev="ggregory" type="update" due-to="Dependabot, Gary Gregory">Bump org.apache.commons:commons-parent from 66 to 67 #250.</action> diff --git a/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java b/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java index 3621487d..84fbe351 100644 --- a/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java +++ b/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java @@ -106,9 +106,8 @@ public class PhoneticEngine { * @param maxPhonemes the maximum number of phonemes to build up */ public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) { - final Set<Rule.Phoneme> newPhonemes = new LinkedHashSet<>(maxPhonemes); - - EXPR: for (final Rule.Phoneme left : this.phonemes) { + final Set<Rule.Phoneme> newPhonemes = new LinkedHashSet<>(Math.min(phonemes.size() * phonemeExpr.size(), maxPhonemes)); + EXPR: for (final Rule.Phoneme left : phonemes) { for (final Rule.Phoneme right : phonemeExpr.getPhonemes()) { final LanguageSet languages = left.getLanguages().restrictTo(right.getLanguages()); if (!languages.isEmpty()) { @@ -122,9 +121,8 @@ public class PhoneticEngine { } } } - - this.phonemes.clear(); - this.phonemes.addAll(newPhonemes); + phonemes.clear(); + phonemes.addAll(newPhonemes); } /** @@ -133,7 +131,7 @@ public class PhoneticEngine { * @return the phoneme set */ public Set<Rule.Phoneme> getPhonemes() { - return this.phonemes; + return phonemes; } /** @@ -155,22 +153,24 @@ public class PhoneticEngine { * processed already), and {@code found} indicates if a matching rule was found or not. In the case where a * matching rule was found, {@code phonemeBuilder} is replaced with a new builder containing the phonemes * updated by the matching rule. - * + * <p> * Although this class is not thread-safe (it has mutable unprotected fields), it is not shared between threads * as it is constructed as needed by the calling methods. + * </p> + * * @since 1.6 */ private static final class RulesApplication { + private final Map<String, List<Rule>> finalRules; private final CharSequence input; - private final PhonemeBuilder phonemeBuilder; private int i; private final int maxPhonemes; private boolean found; - public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input, - final PhonemeBuilder phonemeBuilder, final int i, final int maxPhonemes) { + public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input, final PhonemeBuilder phonemeBuilder, final int i, + final int maxPhonemes) { Objects.requireNonNull(finalRules, "finalRules"); this.finalRules = finalRules; this.phonemeBuilder = phonemeBuilder; @@ -180,11 +180,11 @@ public class PhoneticEngine { } public int getI() { - return this.i; + return i; } public PhonemeBuilder getPhonemeBuilder() { - return this.phonemeBuilder; + return phonemeBuilder; } /** @@ -195,31 +195,31 @@ public class PhoneticEngine { * @return {@code this} */ public RulesApplication invoke() { - this.found = false; + found = false; int patternLength = 1; - final List<Rule> rules = this.finalRules.get(input.subSequence(i, i + patternLength)); + final List<Rule> rules = finalRules.get(input.subSequence(i, i + patternLength)); if (rules != null) { for (final Rule rule : rules) { final String pattern = rule.getPattern(); patternLength = pattern.length(); - if (rule.patternAndContextMatches(this.input, this.i)) { - this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes); - this.found = true; + if (rule.patternAndContextMatches(input, i)) { + phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes); + found = true; break; } } } - if (!this.found) { + if (!found) { patternLength = 1; } - this.i += patternLength; + i += patternLength; return this; } public boolean isFound() { - return this.found; + return found; } } @@ -269,11 +269,11 @@ public class PhoneticEngine { * the type of names it will use * @param ruleType * the type of rules it will apply - * @param concat + * @param concatenate * if it will concatenate multiple encodings */ - public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat) { - this(nameType, ruleType, concat, DEFAULT_MAX_PHONEMES); + public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concatenate) { + this(nameType, ruleType, concatenate, DEFAULT_MAX_PHONEMES); } /** @@ -283,20 +283,19 @@ public class PhoneticEngine { * the type of names it will use * @param ruleType * the type of rules it will apply - * @param concat + * @param concatenate * if it will concatenate multiple encodings * @param maxPhonemes * the maximum number of phonemes that will be handled * @since 1.7 */ - public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat, - final int maxPhonemes) { + public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concatenate, final int maxPhonemes) { if (ruleType == RuleType.RULES) { throw new IllegalArgumentException("ruleType must not be " + RuleType.RULES); } this.nameType = nameType; this.ruleType = ruleType; - this.concat = concat; + this.concat = concatenate; this.lang = Lang.instance(nameType); this.maxPhonemes = maxPhonemes; } diff --git a/src/main/java/org/apache/commons/codec/language/bm/Rule.java b/src/main/java/org/apache/commons/codec/language/bm/Rule.java index 641719e7..728e4bd0 100644 --- a/src/main/java/org/apache/commons/codec/language/bm/Rule.java +++ b/src/main/java/org/apache/commons/codec/language/bm/Rule.java @@ -166,6 +166,11 @@ public class Rule { return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang)); } + @Override + public int size() { + return 1; + } + @Override public String toString() { return phonemeText.toString() + "[" + languages + "]"; @@ -174,19 +179,35 @@ public class Rule { public interface PhonemeExpr { Iterable<Phoneme> getPhonemes(); + + /** + * Gets the expression size in phonemes. + * + * @return the expression size in phonemes. + * @since 1.17.0 + */ + default int size() { + // All implementations are int-bound. + return (int) Math.min(getPhonemes().spliterator().getExactSizeIfKnown(), Integer.MAX_VALUE); + } } public static final class PhonemeList implements PhonemeExpr { - private final List<Phoneme> phonemes; + private final List<Phoneme> phonemeList; public PhonemeList(final List<Phoneme> phonemes) { - this.phonemes = phonemes; + this.phonemeList = phonemes; } @Override public List<Phoneme> getPhonemes() { - return this.phonemes; + return phonemeList; + } + + @Override + public int size() { + return phonemeList.size(); } } diff --git a/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java b/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java index b3eddb94..7c949f17 100644 --- a/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java +++ b/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java @@ -43,7 +43,8 @@ public class PhoneticEngineTest { Arguments.of("SntJohn-Smith", "sntjonsmit", NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN), Arguments.of("d'ortley", "(ortlaj|ortlej)-(dortlaj|dortlej)", NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN), Arguments.of("van helsing", "(elSink|elsink|helSink|helsink|helzink|xelsink)-(banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink)", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN), - Arguments.of("Judenburg", "iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN) + Arguments.of("Judenburg", "iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN), + Arguments.of("Judenburg", "iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, Integer.MAX_VALUE) ); // @formatter:on } @@ -54,7 +55,8 @@ public class PhoneticEngineTest { Arguments.of("bar", "bar|bor|var|vor", NameType.ASHKENAZI, RuleType.APPROX, Boolean.FALSE, TEN), Arguments.of("al", "|al", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN), Arguments.of("da", "da|di", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN), - Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN) + Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN), + Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, Integer.MAX_VALUE) ); // @formatter:on }