This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git


The following commit(s) were added to refs/heads/master by this push:
     new 3bf874e2 Optimize memory allocation
3bf874e2 is described below

commit 3bf874e2141dc08550c0b330c7a7006f358bb0f0
Author: Gary Gregory <garydgreg...@gmail.com>
AuthorDate: Mon Mar 18 09:27:34 2024 -0400

    Optimize memory allocation
    
    Add org.apache.commons.codec.language.bm.Rule.PhonemeExpr.size()
---
 pom.xml                                            |  6 +--
 src/changes/changes.xml                            |  3 +-
 .../commons/codec/language/bm/PhoneticEngine.java  | 55 +++++++++++-----------
 .../org/apache/commons/codec/language/bm/Rule.java | 27 +++++++++--
 .../codec/language/bm/PhoneticEngineTest.java      |  6 ++-
 5 files changed, 60 insertions(+), 37 deletions(-)

diff --git a/pom.xml b/pom.xml
index 69f9392a..4a42ae67 100644
--- a/pom.xml
+++ b/pom.xml
@@ -28,7 +28,7 @@ limitations under the License.
   </parent>
   <groupId>commons-codec</groupId>
   <artifactId>commons-codec</artifactId>
-  <version>1.16.2-SNAPSHOT</version>
+  <version>1.17.0-SNAPSHOT</version>
   <name>Apache Commons Codec</name>
   <inceptionYear>2002</inceptionYear>
   <description>
@@ -273,9 +273,9 @@ limitations under the License.
     
<checkstyle.config.file>${basedir}/src/conf/checkstyle.xml</checkstyle.config.file>
     <jacoco.skip>false</jacoco.skip>
     <!-- Commons Release Plugin -->
-    <commons.release.version>1.16.1</commons.release.version>
+    <commons.release.version>1.17.0</commons.release.version>
     <commons.bc.version>1.16.0</commons.bc.version>
-    <commons.bc.next>1.16.2</commons.bc.next>
+    <commons.bc.next>1.17.1</commons.bc.next>
     <commons.rc.version>RC1</commons.rc.version>
     <commons.release.isDistModule>true</commons.release.isDistModule>
     
<commons.distSvnStagingUrl>scm:svn:https://dist.apache.org/repos/dist/dev/commons/${commons.componentid}</commons.distSvnStagingUrl>
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 7180ce7d..bdb6e434 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -43,8 +43,9 @@ The <action> type attribute can be add,update,fix,remove.
     <author>Apache Commons Developers</author>
   </properties>
   <body>
-    <release version="1.16.2" date="2024-MM-DD" description="Feature and fix 
release. Requires a minimum of Java 8.">
+    <release version="1.17.0" date="YYYY-MM-DD" description="Feature and fix 
release. Requires a minimum of Java 8.">
       <!-- ADD -->
+      <action type="add" dev="ggregory" due-to="Gary Gregory">Add 
org.apache.commons.codec.language.bm.Rule.PhonemeExpr.size().</action>
       <!-- FIX -->
       <!-- UPDATE -->
       <action                   dev="ggregory" type="update" 
due-to="Dependabot, Gary Gregory">Bump org.apache.commons:commons-parent from 
66 to 67 #250.</action>
diff --git 
a/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java 
b/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
index 3621487d..84fbe351 100644
--- a/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
+++ b/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
@@ -106,9 +106,8 @@ public class PhoneticEngine {
          * @param maxPhonemes   the maximum number of phonemes to build up
          */
         public void apply(final Rule.PhonemeExpr phonemeExpr, final int 
maxPhonemes) {
-            final Set<Rule.Phoneme> newPhonemes = new 
LinkedHashSet<>(maxPhonemes);
-
-            EXPR: for (final Rule.Phoneme left : this.phonemes) {
+            final Set<Rule.Phoneme> newPhonemes = new 
LinkedHashSet<>(Math.min(phonemes.size() * phonemeExpr.size(), maxPhonemes));
+            EXPR: for (final Rule.Phoneme left : phonemes) {
                 for (final Rule.Phoneme right : phonemeExpr.getPhonemes()) {
                     final LanguageSet languages = 
left.getLanguages().restrictTo(right.getLanguages());
                     if (!languages.isEmpty()) {
@@ -122,9 +121,8 @@ public class PhoneticEngine {
                     }
                 }
             }
-
-            this.phonemes.clear();
-            this.phonemes.addAll(newPhonemes);
+            phonemes.clear();
+            phonemes.addAll(newPhonemes);
         }
 
         /**
@@ -133,7 +131,7 @@ public class PhoneticEngine {
          * @return  the phoneme set
          */
         public Set<Rule.Phoneme> getPhonemes() {
-            return this.phonemes;
+            return phonemes;
         }
 
         /**
@@ -155,22 +153,24 @@ public class PhoneticEngine {
      * processed already), and {@code found} indicates if a matching rule was 
found or not. In the case where a
      * matching rule was found, {@code phonemeBuilder} is replaced with a new 
builder containing the phonemes
      * updated by the matching rule.
-     *
+     * <p>
      * Although this class is not thread-safe (it has mutable unprotected 
fields), it is not shared between threads
      * as it is constructed as needed by the calling methods.
+     * </p>
+     *
      * @since 1.6
      */
     private static final class RulesApplication {
+
         private final Map<String, List<Rule>> finalRules;
         private final CharSequence input;
-
         private final PhonemeBuilder phonemeBuilder;
         private int i;
         private final int maxPhonemes;
         private boolean found;
 
-        public RulesApplication(final Map<String, List<Rule>> finalRules, 
final CharSequence input,
-                                final PhonemeBuilder phonemeBuilder, final int 
i, final int maxPhonemes) {
+        public RulesApplication(final Map<String, List<Rule>> finalRules, 
final CharSequence input, final PhonemeBuilder phonemeBuilder, final int i,
+                final int maxPhonemes) {
             Objects.requireNonNull(finalRules, "finalRules");
             this.finalRules = finalRules;
             this.phonemeBuilder = phonemeBuilder;
@@ -180,11 +180,11 @@ public class PhoneticEngine {
         }
 
         public int getI() {
-            return this.i;
+            return i;
         }
 
         public PhonemeBuilder getPhonemeBuilder() {
-            return this.phonemeBuilder;
+            return phonemeBuilder;
         }
 
         /**
@@ -195,31 +195,31 @@ public class PhoneticEngine {
          * @return {@code this}
          */
         public RulesApplication invoke() {
-            this.found = false;
+            found = false;
             int patternLength = 1;
-            final List<Rule> rules = this.finalRules.get(input.subSequence(i, 
i + patternLength));
+            final List<Rule> rules = finalRules.get(input.subSequence(i, i + 
patternLength));
             if (rules != null) {
                 for (final Rule rule : rules) {
                     final String pattern = rule.getPattern();
                     patternLength = pattern.length();
-                    if (rule.patternAndContextMatches(this.input, this.i)) {
-                        this.phonemeBuilder.apply(rule.getPhoneme(), 
maxPhonemes);
-                        this.found = true;
+                    if (rule.patternAndContextMatches(input, i)) {
+                        phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
+                        found = true;
                         break;
                     }
                 }
             }
 
-            if (!this.found) {
+            if (!found) {
                 patternLength = 1;
             }
 
-            this.i += patternLength;
+            i += patternLength;
             return this;
         }
 
         public boolean isFound() {
-            return this.found;
+            return found;
         }
     }
 
@@ -269,11 +269,11 @@ public class PhoneticEngine {
      *            the type of names it will use
      * @param ruleType
      *            the type of rules it will apply
-     * @param concat
+     * @param concatenate
      *            if it will concatenate multiple encodings
      */
-    public PhoneticEngine(final NameType nameType, final RuleType ruleType, 
final boolean concat) {
-        this(nameType, ruleType, concat, DEFAULT_MAX_PHONEMES);
+    public PhoneticEngine(final NameType nameType, final RuleType ruleType, 
final boolean concatenate) {
+        this(nameType, ruleType, concatenate, DEFAULT_MAX_PHONEMES);
     }
 
     /**
@@ -283,20 +283,19 @@ public class PhoneticEngine {
      *            the type of names it will use
      * @param ruleType
      *            the type of rules it will apply
-     * @param concat
+     * @param concatenate
      *            if it will concatenate multiple encodings
      * @param maxPhonemes
      *            the maximum number of phonemes that will be handled
      * @since 1.7
      */
-    public PhoneticEngine(final NameType nameType, final RuleType ruleType, 
final boolean concat,
-                          final int maxPhonemes) {
+    public PhoneticEngine(final NameType nameType, final RuleType ruleType, 
final boolean concatenate, final int maxPhonemes) {
         if (ruleType == RuleType.RULES) {
             throw new IllegalArgumentException("ruleType must not be " + 
RuleType.RULES);
         }
         this.nameType = nameType;
         this.ruleType = ruleType;
-        this.concat = concat;
+        this.concat = concatenate;
         this.lang = Lang.instance(nameType);
         this.maxPhonemes = maxPhonemes;
     }
diff --git a/src/main/java/org/apache/commons/codec/language/bm/Rule.java 
b/src/main/java/org/apache/commons/codec/language/bm/Rule.java
index 641719e7..728e4bd0 100644
--- a/src/main/java/org/apache/commons/codec/language/bm/Rule.java
+++ b/src/main/java/org/apache/commons/codec/language/bm/Rule.java
@@ -166,6 +166,11 @@ public class Rule {
           return new Phoneme(this.phonemeText.toString(), 
this.languages.merge(lang));
         }
 
+        @Override
+        public int size() {
+            return 1;
+        }
+
         @Override
         public String toString() {
           return phonemeText.toString() + "[" + languages + "]";
@@ -174,19 +179,35 @@ public class Rule {
 
     public interface PhonemeExpr {
         Iterable<Phoneme> getPhonemes();
+
+        /**
+         * Gets the expression size in phonemes.
+         *
+         * @return the expression size in phonemes.
+         * @since 1.17.0
+         */
+        default int size() {
+            // All implementations are int-bound.
+            return (int) 
Math.min(getPhonemes().spliterator().getExactSizeIfKnown(), Integer.MAX_VALUE);
+        }
     }
 
     public static final class PhonemeList implements PhonemeExpr {
 
-        private final List<Phoneme> phonemes;
+        private final List<Phoneme> phonemeList;
 
         public PhonemeList(final List<Phoneme> phonemes) {
-            this.phonemes = phonemes;
+            this.phonemeList = phonemes;
         }
 
         @Override
         public List<Phoneme> getPhonemes() {
-            return this.phonemes;
+            return phonemeList;
+        }
+
+        @Override
+        public int size() {
+            return phonemeList.size();
         }
     }
 
diff --git 
a/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java 
b/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
index b3eddb94..7c949f17 100644
--- a/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
+++ b/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
@@ -43,7 +43,8 @@ public class PhoneticEngineTest {
                 Arguments.of("SntJohn-Smith", "sntjonsmit", NameType.GENERIC, 
RuleType.EXACT, Boolean.TRUE, TEN),
                 Arguments.of("d'ortley", "(ortlaj|ortlej)-(dortlaj|dortlej)", 
NameType.GENERIC, RuleType.EXACT, Boolean.TRUE, TEN),
                 Arguments.of("van helsing", 
"(elSink|elsink|helSink|helsink|helzink|xelsink)-(banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink)",
 NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN),
-                Arguments.of("Judenburg", 
"iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", 
NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN)
+                Arguments.of("Judenburg", 
"iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", 
NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, TEN),
+                Arguments.of("Judenburg", 
"iudnbYrk|iudnbirk|iudnburk|xudnbirk|xudnburk|zudnbirk|zudnburk", 
NameType.GENERIC, RuleType.APPROX, Boolean.TRUE, Integer.MAX_VALUE)
                 );
         // @formatter:on
     }
@@ -54,7 +55,8 @@ public class PhoneticEngineTest {
                 Arguments.of("bar", "bar|bor|var|vor", NameType.ASHKENAZI, 
RuleType.APPROX, Boolean.FALSE, TEN),
                 Arguments.of("al", "|al", NameType.SEPHARDIC, RuleType.APPROX, 
Boolean.FALSE, TEN),
                 Arguments.of("da", "da|di", NameType.GENERIC, RuleType.EXACT, 
Boolean.FALSE, TEN),
-                Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, 
Boolean.FALSE, TEN)
+                Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, 
Boolean.FALSE, TEN),
+                Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, 
Boolean.FALSE, Integer.MAX_VALUE)
                 );
         // @formatter:on
     }

Reply via email to