This is an automated email from the ASF dual-hosted git repository.
garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git
The following commit(s) were added to refs/heads/master by this push:
new fa7c0755 introduce private methods to remove clones (#739)
fa7c0755 is described below
commit fa7c07550dd465a941081089d96ef99f9af8dec0
Author: aaaZayne <[email protected]>
AuthorDate: Fri Jun 12 21:12:35 2026 +1000
introduce private methods to remove clones (#739)
---
.../org/apache/commons/text/TextStringBuilder.java | 37 ++++--------
.../similarity/DamerauLevenshteinDistance.java | 70 ++++++++++------------
2 files changed, 43 insertions(+), 64 deletions(-)
diff --git a/src/main/java/org/apache/commons/text/TextStringBuilder.java
b/src/main/java/org/apache/commons/text/TextStringBuilder.java
index eda1b31d..a489857b 100644
--- a/src/main/java/org/apache/commons/text/TextStringBuilder.java
+++ b/src/main/java/org/apache/commons/text/TextStringBuilder.java
@@ -652,6 +652,15 @@ public class TextStringBuilder implements CharSequence,
Appendable, Serializable
return append(str, 0, StringUtils.length(str));
}
+ private void validateRange(final int startIndex, final int length, final
int strLength) {
+ if (startIndex < 0 || startIndex > strLength) {
+ throw new StringIndexOutOfBoundsException("startIndex must be
valid");
+ }
+ if (length < 0 || startIndex + length > strLength) {
+ throw new StringIndexOutOfBoundsException("length must be valid");
+ }
+ }
+
/**
* Appends part of a string to this string builder. Appending null will
call {@link #appendNull()}.
*
@@ -667,12 +676,7 @@ public class TextStringBuilder implements CharSequence,
Appendable, Serializable
if (str == null) {
return appendNull();
}
- if (startIndex < 0 || startIndex > str.length()) {
- throw new StringIndexOutOfBoundsException("startIndex must be
valid");
- }
- if (length < 0 || startIndex + length > str.length()) {
- throw new StringIndexOutOfBoundsException("length must be valid");
- }
+ validateRange(startIndex, length, str.length());
if (length > 0) {
final int len = length();
ensureCapacityInternal(len + length);
@@ -716,12 +720,7 @@ public class TextStringBuilder implements CharSequence,
Appendable, Serializable
if (str == null) {
return appendNull();
}
- if (startIndex < 0 || startIndex > str.length()) {
- throw new StringIndexOutOfBoundsException("startIndex must be
valid");
- }
- if (length < 0 || startIndex + length > str.length()) {
- throw new StringIndexOutOfBoundsException("length must be valid");
- }
+ validateRange(startIndex, length, str.length());
if (length > 0) {
final int len = length();
ensureCapacityInternal(len + length);
@@ -753,12 +752,7 @@ public class TextStringBuilder implements CharSequence,
Appendable, Serializable
if (str == null) {
return appendNull();
}
- if (startIndex < 0 || startIndex > str.length()) {
- throw new StringIndexOutOfBoundsException("startIndex must be
valid");
- }
- if (length < 0 || startIndex + length > str.length()) {
- throw new StringIndexOutOfBoundsException("length must be valid");
- }
+ validateRange(startIndex, length, str.length());
if (length > 0) {
final int len = length();
ensureCapacityInternal(len + length);
@@ -790,12 +784,7 @@ public class TextStringBuilder implements CharSequence,
Appendable, Serializable
if (str == null) {
return appendNull();
}
- if (startIndex < 0 || startIndex > str.length()) {
- throw new StringIndexOutOfBoundsException("startIndex must be
valid");
- }
- if (length < 0 || startIndex + length > str.length()) {
- throw new StringIndexOutOfBoundsException("length must be valid");
- }
+ validateRange(startIndex, length, str.length());
if (length > 0) {
final int len = length();
ensureCapacityInternal(len + length);
diff --git
a/src/main/java/org/apache/commons/text/similarity/DamerauLevenshteinDistance.java
b/src/main/java/org/apache/commons/text/similarity/DamerauLevenshteinDistance.java
index 93f4627c..ab963579 100644
---
a/src/main/java/org/apache/commons/text/similarity/DamerauLevenshteinDistance.java
+++
b/src/main/java/org/apache/commons/text/similarity/DamerauLevenshteinDistance.java
@@ -41,6 +41,32 @@ public class DamerauLevenshteinDistance implements
EditDistance<Integer> {
return distance > threshold ? -1 : distance;
}
+ private static <E> int calculateCost(final SimilarityInput<E> left, final
SimilarityInput<E> right,
+ final int leftIndex, final int
rightIndex,
+ final int[] curr, final int[] prev,
final int[] prevPrev) {
+ final int cost = left.at(leftIndex - 1) == right.at(rightIndex - 1) ?
0 : 1;
+
+ // Select cheapest operation
+ int value = Math.min(
+ Math.min(
+ prev[rightIndex] + 1, // Delete current character
+ curr[rightIndex - 1] + 1 // Insert current character
+ ),
+ prev[rightIndex - 1] + cost // Replace (or no cost if same
character)
+ );
+
+ // Check if adjacent characters are the same -> transpose if cheaper
+ if (leftIndex > 1
+ && rightIndex > 1
+ && left.at(leftIndex - 1) == right.at(rightIndex - 2)
+ && left.at(leftIndex - 2) == right.at(rightIndex - 1)) {
+ // Use cost here, to properly handle two subsequent equal letters
+ value = Math.min(value, prevPrev[rightIndex - 2] + cost);
+ }
+
+ return value;
+ }
+
/**
* Finds the Damerau-Levenshtein distance between two CharSequences if
it's less than or equal to a given threshold.
*
@@ -89,7 +115,7 @@ public class DamerauLevenshteinDistance implements
EditDistance<Integer> {
int[] prevPrev = new int[rightLength + 1];
int[] temp; // Temp variable use to shuffle arrays at the end of each
iteration
- int rightIndex, leftIndex, cost, minCost;
+ int rightIndex, leftIndex, minCost;
// Changing empty sequence to [0..i] requires i insertions
for (rightIndex = 0; rightIndex <= rightLength; rightIndex++) {
@@ -111,25 +137,7 @@ public class DamerauLevenshteinDistance implements
EditDistance<Integer> {
minCost = Integer.MAX_VALUE;
for (rightIndex = 1; rightIndex <= rightLength; rightIndex++) {
- cost = left.at(leftIndex - 1) == right.at(rightIndex - 1) ? 0
: 1;
-
- // Select cheapest operation
- curr[rightIndex] = Math.min(
- Math.min(
- prev[rightIndex] + 1, // Delete current
character
- curr[rightIndex - 1] + 1 // Insert current
character
- ),
- prev[rightIndex - 1] + cost // Replace (or no cost if
same character)
- );
-
- // Check if adjacent characters are the same -> transpose if
cheaper
- if (leftIndex > 1
- && rightIndex > 1
- && left.at(leftIndex - 1) == right.at(rightIndex - 2)
- && left.at(leftIndex - 2) == right.at(rightIndex - 1))
{
- // Use cost here, to properly handle two subsequent equal
letters
- curr[rightIndex] = Math.min(curr[rightIndex],
prevPrev[rightIndex - 2] + cost);
- }
+ curr[rightIndex] = calculateCost(left, right, leftIndex,
rightIndex, curr, prev, prevPrev);
minCost = Math.min(curr[rightIndex], minCost);
}
@@ -196,7 +204,7 @@ public class DamerauLevenshteinDistance implements
EditDistance<Integer> {
int[] prevPrev = new int[rightLength + 1];
int[] temp; // Temp variable use to shuffle arrays at the end of each
iteration
- int rightIndex, leftIndex, cost;
+ int rightIndex, leftIndex;
// Changing empty sequence to [0..i] requires i insertions
for (rightIndex = 0; rightIndex <= rightLength; rightIndex++) {
@@ -216,25 +224,7 @@ public class DamerauLevenshteinDistance implements
EditDistance<Integer> {
curr[0] = leftIndex;
for (rightIndex = 1; rightIndex <= rightLength; rightIndex++) {
- cost = left.at(leftIndex - 1) == right.at(rightIndex - 1) ? 0
: 1;
-
- // Select cheapest operation
- curr[rightIndex] = Math.min(
- Math.min(
- prev[rightIndex] + 1, // Delete current
character
- curr[rightIndex - 1] + 1 // Insert current
character
- ),
- prev[rightIndex - 1] + cost // Replace (or no cost if
same character)
- );
-
- // Check if adjacent characters are the same -> transpose if
cheaper
- if (leftIndex > 1
- && rightIndex > 1
- && left.at(leftIndex - 1) == right.at(rightIndex - 2)
- && left.at(leftIndex - 2) == right.at(rightIndex - 1))
{
- // Use cost here, to properly handle two subsequent equal
letters
- curr[rightIndex] = Math.min(curr[rightIndex],
prevPrev[rightIndex - 2] + cost);
- }
+ curr[rightIndex] = calculateCost(left, right, leftIndex,
rightIndex, curr, prev, prevPrev);
}
// Rotate arrays for next iteration