This is an automated email from the ASF dual-hosted git repository. davsclaus pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/master by this push: new 3823a15 Bugfix for UnicodeHelper in Bindy component (#3702) 3823a15 is described below commit 3823a1564f72803267ba64b404989c47f735a3a4 Author: mgr-lhm <externer.dl.greul...@muenchen.de> AuthorDate: Wed Apr 1 16:32:21 2020 +0200 Bugfix for UnicodeHelper in Bindy component (#3702) * Bugfix for UnicodeHelper.indexOf(...). * Default for counting chars is now codepoints like in XMLSchema. Co-authored-by: Michael Greulich <michael.greul...@interface-ag.de> --- .../camel/dataformat/bindy/UnicodeHelper.java | 29 ++++++------- .../bindy/annotation/FixedLengthRecord.java | 2 +- .../camel/dataformat/bindy/UnicodeHelperTest.java | 50 ++++++++++------------ 3 files changed, 37 insertions(+), 44 deletions(-) diff --git a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/UnicodeHelper.java b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/UnicodeHelper.java index f55e4e2..6a30753 100644 --- a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/UnicodeHelper.java +++ b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/UnicodeHelper.java @@ -117,25 +117,24 @@ public class UnicodeHelper implements Serializable { * @see String#indexOf(String) */ public int indexOf(final String str) { + return indexOf(str, 0); + } + + /** + * @see String#indexOf(String, int) + */ + public int indexOf(final String str, final int fromIndex) { split(); - final int tempIdx = input.indexOf(str); - if (tempIdx < 0) { - return tempIdx; - } - - for (int b = 0; b < splitted.size() - 1; b++) { - if (tempIdx == splitted.get(b)) { - for (int e = b + 1; e < splitted.size() - 1; e++) { - if (tempIdx + str.length() == splitted.get(e)) { - return b; - } - } + final int len = new UnicodeHelper(str, method).length(); + + for (int index = fromIndex; index + len < length(); index++) { + if (str.equals(input.substring(splitted.get(index), splitted.get(index + len)))) { + return index; } } - - final String cps = str.codePoints().mapToObj(cp -> String.format("0x%X", cp)).collect(Collectors.joining(",")); - throw new IllegalArgumentException("Given string (" + cps + ") is not a valid sequence of " + this.method + "s."); + + return -1; } private void split() { diff --git a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/FixedLengthRecord.java b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/FixedLengthRecord.java index d8d93ae..ebfbe73 100644 --- a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/FixedLengthRecord.java +++ b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/FixedLengthRecord.java @@ -102,5 +102,5 @@ public @interface FixedLengthRecord { /** * Indicates how chars are counted */ - boolean countGrapheme() default true; + boolean countGrapheme() default false; } diff --git a/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/UnicodeHelperTest.java b/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/UnicodeHelperTest.java index 6765887..b2e7fdb 100644 --- a/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/UnicodeHelperTest.java +++ b/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/UnicodeHelperTest.java @@ -167,49 +167,43 @@ public class UnicodeHelperTest { public void testIndexOf() { final UnicodeHelper lh = new UnicodeHelper("a", Method.CODEPOINTS); Assert.assertEquals(-1, lh.indexOf("b")); - + final UnicodeHelper lh2 = new UnicodeHelper( - "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z", + "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z" + + "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z", Method.CODEPOINTS); - + Assert.assertEquals(1, lh2.indexOf(new String(Character.toChars(0x1f600)))); + Assert.assertEquals(14, lh2.indexOf(new String(Character.toChars(0x1f600)), 13)); Assert.assertEquals(3, lh2.indexOf(UCSTR)); - + Assert.assertEquals(16, lh2.indexOf(UCSTR, 13)); + Assert.assertEquals(10, lh2.indexOf("\u035f")); - - expectIllegalArgumentException(() -> { - lh2.indexOf(Character.toString(Character.toChars(0x1f600)[0])); // UTF-16 surrogates are no codepoints. - }); + Assert.assertEquals(23, lh2.indexOf("\u035f", 13)); } - + @Test public void testIndexOf2() { final UnicodeHelper lh = new UnicodeHelper("a", Method.GRAPHEME); Assert.assertEquals(-1, lh.indexOf("b")); - + final UnicodeHelper lh2 = new UnicodeHelper( - "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z", - Method.GRAPHEME); - + "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z" + + "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z", + Method.GRAPHEME + ); + Assert.assertEquals(1, lh2.indexOf(new String(Character.toChars(0x1f600)))); - + Assert.assertEquals(9, lh2.indexOf(new String(Character.toChars(0x1f600)), 8)); + Assert.assertEquals(3, lh2.indexOf(UCSTR)); + Assert.assertEquals(11, lh2.indexOf(UCSTR), 8); - expectIllegalArgumentException(() -> { - lh2.indexOf("\u035f"); // Codepoint of dangling combing char is not a "unicode char". - }); - } - - private void expectIllegalArgumentException(final Runnable r) { - try { - r.run(); - Assert.assertTrue("We do not expect to reach here -- missing IllegalArgumentException.", false); - - } catch (final IllegalArgumentException e) { - LOG.debug("Caught expected IllegalArgumentException", e); - - } + final UnicodeHelper lh3 = new UnicodeHelper("mm̂mm̂m", Method.GRAPHEME); + Assert.assertEquals(0, lh3.indexOf("m")); + Assert.assertEquals(2, lh3.indexOf("m", 1)); + Assert.assertEquals(3, lh3.indexOf("m̂", 2)); } private static String cps2String(final int... cps) {