Repository: camel Updated Branches: refs/heads/camel-2.13.x b4479e310 -> 5dd561ddb
CAMEL-7388: xmlTokenizer to optionally wrap the token with the enclosing elements Project: http://git-wip-us.apache.org/repos/asf/camel/repo Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/f57ded2b Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/f57ded2b Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/f57ded2b Branch: refs/heads/camel-2.13.x Commit: f57ded2bbb8ee8d74f0257be0d3d2dcb8116cf2f Parents: b4479e3 Author: Akitoshi Yoshida <a...@apache.org> Authored: Wed Apr 23 10:16:13 2014 +0200 Committer: Akitoshi Yoshida <a...@apache.org> Committed: Thu Apr 24 10:59:11 2014 +0200 ---------------------------------------------------------------------- .../support/TokenXMLExpressionIterator.java | 122 ++++++++++++++++-- .../tokenizer/TokenizeLanguageTest.java | 10 ++ .../tokenizer/TokenizeWrapLanguageTest.java | 128 +++++++++++++++++++ 3 files changed, 249 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/camel/blob/f57ded2b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java ---------------------------------------------------------------------- diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java index ba21a71..f6ac6be 100644 --- a/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java +++ b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java @@ -16,14 +16,20 @@ */ package org.apache.camel.support; +import java.io.ByteArrayOutputStream; import java.io.Closeable; +import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.UnsupportedEncodingException; import java.text.MessageFormat; +import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.Scanner; +import java.util.regex.MatchResult; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -49,7 +55,8 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter { private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)"; private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>"; private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>"; - + private static final String OPTION_WRAP_TOKEN = "<*>"; + protected final String tagToken; protected final String inheritNamespaceToken; @@ -126,12 +133,14 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter { private final Pattern tagTokenPattern; private final String inheritNamespaceToken; + private final boolean wrapToken; private Pattern inheritNamespaceTokenPattern; private String rootTokenNamespaces; + private String wrapHead; + private String wrapTail; XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) { this.tagToken = tagToken; - this.in = in; this.charset = charset; // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns @@ -141,13 +150,20 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter { Pattern.MULTILINE | Pattern.DOTALL); this.inheritNamespaceToken = inheritNamespaceToken; - if (inheritNamespaceToken != null) { - // the inherit namespace token may itself have a namespace prefix - // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines - this.inheritNamespaceTokenPattern = - Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE, - SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), - Pattern.MULTILINE | Pattern.DOTALL); + if (inheritNamespaceToken != null && OPTION_WRAP_TOKEN.equals(inheritNamespaceToken)) { + this.wrapToken = true; + this.in = new RecordableInputStream(in, charset); + } else { + this.wrapToken = false; + this.in = in; + if (inheritNamespaceToken != null) { + // the inherit namespace token may itself have a namespace prefix + // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines + this.inheritNamespaceTokenPattern = + Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE, + SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), + Pattern.MULTILINE | Pattern.DOTALL); + } } } @@ -159,7 +175,7 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter { String getNext(boolean first) { // initialize inherited namespaces on first - if (first && inheritNamespaceToken != null) { + if (first && inheritNamespaceToken != null && !wrapToken) { rootTokenNamespaces = getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0)); } @@ -167,10 +183,15 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter { if (next == null) { return null; } + if (first && wrapToken) { + MatchResult mres = scanner.match(); + wrapHead = ((RecordableInputStream)in).getText(mres.start()); + wrapTail = buildXMLTail(wrapHead); + } // build answer accordingly to whether namespaces should be inherited or not - // REVISIT should skip the prefixes that are declared within the child itself. if (inheritNamespaceToken != null && rootTokenNamespaces != null) { + // REVISIT should skip the prefixes that are declared within the child itself. String head = ObjectHelper.before(next, ">"); boolean empty = false; if (head.endsWith("/")) { @@ -183,6 +204,10 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter { String tail = ObjectHelper.after(next, ">"); // build result with inherited namespaces next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString(); + } else if (wrapToken) { + // wrap the token + StringBuilder sb = new StringBuilder(); + next = sb.append(wrapHead).append(next).append(wrapTail).toString(); } return next; @@ -267,4 +292,79 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter { } + private static String buildXMLTail(String xmlhead) { + // assume the input text is a portion of a well-formed xml + List<String> tags = new ArrayList<String>(); + int p = 0; + while (p < xmlhead.length()) { + p = xmlhead.indexOf('<', p); + if (p < 0) { + break; + } + int nc = xmlhead.charAt(p + 1); + if (nc == '?') { + p++; + continue; + } else if (nc == '/') { + p++; + tags.remove(tags.size() - 1); + } else { + final int ep = xmlhead.indexOf('>', p); + if (xmlhead.charAt(ep - 1) == '/') { + p++; + continue; + } + final int sp = xmlhead.substring(p, ep).indexOf(' '); + tags.add(xmlhead.substring(p + 1, sp > 0 ? p + sp : ep)); + p = ep; + } + } + StringBuilder sb = new StringBuilder(); + for (int i = tags.size() - 1; i >= 0; i--) { + sb.append("</").append(tags.get(i)).append(">"); + } + return sb.toString(); + } + + // this input stream records the stream until the first text extraction occurs. + private static class RecordableInputStream extends FilterInputStream { + private ByteArrayOutputStream buf; + private String charset; + private boolean recording; + protected RecordableInputStream(InputStream in, String charset) { + super(in); + this.buf = new ByteArrayOutputStream(); + this.charset = charset; + this.recording = true; + } + + @Override + public int read() throws IOException { + int c = super.read(); + if (c > 0 && recording) { + buf.write(c); + } + return c; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int n = super.read(b, off, len); + if (n > 0 && recording) { + buf.write(b, off, n); + } + return n; + } + + public String getText(int pos) { + String t = null; + recording = false; + try { + t = new String(buf.toByteArray(), 0, pos, charset); + } catch (UnsupportedEncodingException e) { + // ignore it as this should have be caught while scanning. + } + return t; + } + } } http://git-wip-us.apache.org/repos/asf/camel/blob/f57ded2b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java ---------------------------------------------------------------------- diff --git a/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java index 939fd10..1238c64 100644 --- a/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java +++ b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java @@ -96,6 +96,16 @@ public class TokenizeLanguageTest extends ContextTestSupport { assertMockEndpointsSatisfied(); } + public void testSendMoreParentsMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived( + "<c:child some_attr='a' anotherAttr='a' xmlns:c='urn:c' xmlns:d=\"urn:d\"></c:child>", "<c:child some_attr='b' anotherAttr='b' xmlns:c='urn:c' xmlns:d=\"urn:d\"/>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><g:greatgreatparent xmlns:g='urn:g'><greatparent><uncle/><aunt>emma</aunt><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child><c:child some_attr='b' anotherAttr='b'/></c:parent></greatparent></g:greatgreatparent>"); + + assertMockEndpointsSatisfied(); + } + @Override protected RouteBuilder createRouteBuilder() { return new RouteBuilder() { http://git-wip-us.apache.org/repos/asf/camel/blob/f57ded2b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeWrapLanguageTest.java ---------------------------------------------------------------------- diff --git a/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeWrapLanguageTest.java b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeWrapLanguageTest.java new file mode 100644 index 0000000..61ed6ea --- /dev/null +++ b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeWrapLanguageTest.java @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.language.tokenizer; + +import org.apache.camel.ContextTestSupport; +import org.apache.camel.builder.RouteBuilder; + +public class TokenizeWrapLanguageTest extends ContextTestSupport { + + public void testSendClosedTagMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived("<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'></child></parent>", + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='b' anotherAttr='b'></child></parent>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'></child><child some_attr='b' anotherAttr='b'></child></parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendClosedTagWithLineBreaksMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived("<?xml version='1.0' encoding='UTF-8'?>\n<parent>\n<child some_attr='a' anotherAttr='a'>\n</child></parent>", + "<?xml version='1.0' encoding='UTF-8'?>\n<parent>\n<child some_attr='b' anotherAttr='b'>\n</child></parent>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?>\n" + + "<parent>\n" + + "<child some_attr='a' anotherAttr='a'>\n" + + "</child>\n" + + "<child some_attr='b' anotherAttr='b'>\n" + + "</child>\n" + + "</parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendSelfClosingTagMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived("<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a' /></parent>", + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='b' anotherAttr='b' /></parent>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a' /><child some_attr='b' anotherAttr='b' /></parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendMixedClosingTagMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived( + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'>ha</child></parent>", + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='b' anotherAttr='b' /></parent>", + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='c'></child></parent>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'>ha</child><child some_attr='b' anotherAttr='b' /><child some_attr='c'></child></parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendMixedClosingTagInsideMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived( + "<parent><child name='child1'><grandchild name='grandchild1'/> <grandchild name='grandchild2'/></child></parent>", + "<parent><child name='child2'><grandchild name='grandchild1'></grandchild><grandchild name='grandchild2'></grandchild></child></parent>"); + + template.sendBody("direct:start", + "<parent><child name='child1'><grandchild name='grandchild1'/> <grandchild name='grandchild2'/></child>" + + "<child name='child2'><grandchild name='grandchild1'></grandchild><grandchild name='grandchild2'></grandchild></child></parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendNamespacedChildMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived( + "<?xml version='1.0' encoding='UTF-8'?><parent><c:child xmlns:c='urn:c' some_attr='a' anotherAttr='a'></c:child></parent>", + "<?xml version='1.0' encoding='UTF-8'?><parent><c:child xmlns:c='urn:c' some_attr='b' anotherAttr='b' /></parent>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><parent><c:child xmlns:c='urn:c' some_attr='a' anotherAttr='a'></c:child><c:child xmlns:c='urn:c' some_attr='b' anotherAttr='b' /></parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendNamespacedParentMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived( + "<?xml version='1.0' encoding='UTF-8'?><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child></c:parent>", + "<?xml version='1.0' encoding='UTF-8'?><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='b' anotherAttr='b'/></c:parent>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child><c:child some_attr='b' anotherAttr='b'/></c:parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendMoreParentsMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived( + "<?xml version='1.0' encoding='UTF-8'?><g:greatgreatparent xmlns:g='urn:g'><greatparent><uncle/><aunt>emma</aunt><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child></c:parent></greatparent></g:greatgreatparent>", + "<?xml version='1.0' encoding='UTF-8'?><g:greatgreatparent xmlns:g='urn:g'><greatparent><uncle/><aunt>emma</aunt><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='b' anotherAttr='b'/></c:parent></greatparent></g:greatgreatparent>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><g:greatgreatparent xmlns:g='urn:g'><greatparent><uncle/><aunt>emma</aunt><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child><c:child some_attr='b' anotherAttr='b'/></c:parent></greatparent></g:greatgreatparent>"); + + assertMockEndpointsSatisfied(); + } + + @Override + protected RouteBuilder createRouteBuilder() { + return new RouteBuilder() { + public void configure() { + from("direct:start") + .split().tokenizeXML("child", "*") + .to("mock:result") + .end(); + } + }; + } +}