CAMEL-6004: TokenizeXML added support for self closing tags. Thanks to Aki Yoshida for the patch.
Conflicts: camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java Project: http://git-wip-us.apache.org/repos/asf/camel/repo Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/6a641ec6 Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/6a641ec6 Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/6a641ec6 Branch: refs/heads/camel-2.11.x Commit: 6a641ec65777b9e63ba905312880b08372ed0b2b Parents: 5467ef1 Author: Claus Ibsen <davscl...@apache.org> Authored: Sun Jun 23 10:19:25 2013 +0200 Committer: Claus Ibsen <davscl...@apache.org> Committed: Sun Jun 23 10:23:04 2013 +0200 ---------------------------------------------------------------------- .../apache/camel/builder/ExpressionBuilder.java | 8 +- .../support/TokenPairExpressionIterator.java | 2 +- .../support/TokenXMLExpressionIterator.java | 270 +++++++++++++++++++ .../support/TokenXMLPairExpressionIterator.java | 3 + .../tokenizer/TokenizeLanguageTest.java | 98 +++++++ 5 files changed, 375 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java ---------------------------------------------------------------------- diff --git a/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java b/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java index 0d135bd..b5d29ef 100644 --- a/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java +++ b/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java @@ -45,7 +45,7 @@ import org.apache.camel.model.language.MethodCallExpression; import org.apache.camel.spi.Language; import org.apache.camel.support.ExpressionAdapter; import org.apache.camel.support.TokenPairExpressionIterator; -import org.apache.camel.support.TokenXMLPairExpressionIterator; +import org.apache.camel.support.TokenXMLExpressionIterator; import org.apache.camel.util.ExchangeHelper; import org.apache.camel.util.FileUtil; import org.apache.camel.util.GroupIterator; @@ -1153,7 +1153,7 @@ public final class ExpressionBuilder { } /** - * Returns an {@link TokenXMLPairExpressionIterator} expression + * Returns an {@link TokenXMLExpressionIterator} expression */ public static Expression tokenizeXMLExpression(String tagName, String inheritNamespaceTagName) { ObjectHelper.notEmpty(tagName, "tagName"); @@ -1166,8 +1166,6 @@ public final class ExpressionBuilder { tagName = tagName + ">"; } - String endToken = "</" + tagName.substring(1); - if (inheritNamespaceTagName != null) { if (!inheritNamespaceTagName.startsWith("<")) { inheritNamespaceTagName = "<" + inheritNamespaceTagName; @@ -1177,7 +1175,7 @@ public final class ExpressionBuilder { } } - return new TokenXMLPairExpressionIterator(tagName, endToken, inheritNamespaceTagName); + return new TokenXMLExpressionIterator(tagName, inheritNamespaceTagName); } /** http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java ---------------------------------------------------------------------- diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java index 489f5e6..b6b9133 100644 --- a/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java +++ b/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java @@ -34,7 +34,7 @@ import org.apache.camel.util.ObjectHelper; * The message body must be able to convert to {@link InputStream} type which is used as stream * to access the message body. * <p/> - * For splitting XML files use {@link TokenXMLPairExpressionIterator} instead. + * For splitting XML files use {@link org.apache.camel.support.TokenXMLExpressionIterator} instead. */ public class TokenPairExpressionIterator extends ExpressionAdapter { http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java ---------------------------------------------------------------------- diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java new file mode 100644 index 0000000..b8d4374 --- /dev/null +++ b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java @@ -0,0 +1,270 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.support; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.text.MessageFormat; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Scanner; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.camel.Exchange; +import org.apache.camel.InvalidPayloadException; +import org.apache.camel.util.IOHelper; +import org.apache.camel.util.ObjectHelper; + +/** + * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body + * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token, + * where the end token corresponds implicitly to either the end tag or the self-closing start tag. + * <p/> + * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream + * to access the message body. + * <p/> + * Can be used to split big XML files. + * <p/> + * This implementation supports inheriting namespaces from a parent/root tag. + */ +public class TokenXMLExpressionIterator extends ExpressionAdapter { + private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")"); + private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)"; + private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^/]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!</{0}).)*</{0}\\s*>"; + private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>"; + + protected final String tagToken; + protected final String inheritNamespaceToken; + + public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) { + ObjectHelper.notEmpty(tagToken, "tagToken"); + this.tagToken = tagToken; + // namespace token is optional + this.inheritNamespaceToken = inheritNamespaceToken; + + // must be XML tokens + if (!tagToken.startsWith("<") || !tagToken.endsWith(">")) { + throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tagToken); + } + if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) { + throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken); + } + } + + protected Iterator<?> createIterator(InputStream in, String charset) { + XMLTokenIterator iterator = new XMLTokenIterator(tagToken, inheritNamespaceToken, in, charset); + iterator.init(); + return iterator; + } + + @Override + public boolean matches(Exchange exchange) { + // as a predicate we must close the stream, as we do not return an iterator that can be used + // afterwards to iterate the input stream + Object value = doEvaluate(exchange, true); + return ObjectHelper.evaluateValuePredicate(value); + } + + @Override + public Object evaluate(Exchange exchange) { + // as we return an iterator to access the input stream, we should not close it + return doEvaluate(exchange, false); + } + + /** + * Strategy to evaluate the exchange + * + * @param exchange the exchange + * @param closeStream whether to close the stream before returning from this method. + * @return the evaluated value + */ + protected Object doEvaluate(Exchange exchange, boolean closeStream) { + InputStream in = null; + try { + in = exchange.getIn().getMandatoryBody(InputStream.class); + // we may read from a file, and want to support custom charset defined on the exchange + String charset = IOHelper.getCharsetName(exchange); + return createIterator(in, charset); + } catch (InvalidPayloadException e) { + exchange.setException(e); + // must close input stream + IOHelper.close(in); + return null; + } finally { + if (closeStream) { + IOHelper.close(in); + } + } + } + + /** + * Iterator to walk the input stream + */ + static class XMLTokenIterator implements Iterator<Object>, Closeable { + final String tagToken; + final InputStream in; + final String charset; + Scanner scanner; + Object image; + + private final Pattern tagTokenPattern; + private final String inheritNamespaceToken; + private Pattern inheritNamespaceTokenPattern; + private String rootTokenNamespaces; + + XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) { + this.tagToken = tagToken; + this.in = in; + this.charset = charset; + + // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns + this.tagTokenPattern = + Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, + SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), + Pattern.MULTILINE | Pattern.DOTALL); + + this.inheritNamespaceToken = inheritNamespaceToken; + if (inheritNamespaceToken != null) { + // the inherit namespace token may itself have a namespace prefix + // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines + this.inheritNamespaceTokenPattern = + Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE, + SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), + Pattern.MULTILINE | Pattern.DOTALL); + } + } + + void init() { + // use a scanner with the default delimiter + this.scanner = new Scanner(in, charset); + this.image = scanner.hasNext() ? (String) next(true) : null; + } + + String getNext(boolean first) { + // initialize inherited namespaces on first + if (first && inheritNamespaceToken != null) { + rootTokenNamespaces = getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0)); + } + + String next = scanner.findWithinHorizon(tagTokenPattern, 0); + if (next == null) { + return null; + } + + // build answer accordingly to whether namespaces should be inherited or not + // REVISIT should skip the prefixes that are declared within the child itself. + if (inheritNamespaceToken != null && rootTokenNamespaces != null) { + String head = ObjectHelper.before(next, ">"); + boolean empty = false; + if (head.endsWith("/")) { + head = head.substring(0, head.length() - 1); + empty = true; + } + StringBuilder sb = new StringBuilder(); + // append root namespaces to local start token + // grab the text + String tail = ObjectHelper.after(next, ">"); + // build result with inherited namespaces + next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString(); + } + + return next; + } + + private String getNamespacesFromNamespaceToken(String text) { + if (text == null) { + return null; + } + + // find namespaces (there can be attributes mixed, so we should only grab the namespaces) + Map<String, String> namespaces = new LinkedHashMap<String, String>(); + Matcher matcher = NAMESPACE_PATTERN.matcher(text); + while (matcher.find()) { + String prefix = matcher.group(1); + String url = matcher.group(2); + if (ObjectHelper.isEmpty(prefix)) { + prefix = "_DEFAULT_"; + } else { + // skip leading : + prefix = prefix.substring(1); + } + namespaces.put(prefix, url); + } + + // did we find any namespaces + if (namespaces.isEmpty()) { + return null; + } + + // build namespace String + StringBuilder sb = new StringBuilder(); + for (Map.Entry<String, String> entry : namespaces.entrySet()) { + String key = entry.getKey(); + // note the value is already quoted + String value = entry.getValue(); + if ("_DEFAULT_".equals(key)) { + sb.append(" xmlns=").append(value); + } else { + sb.append(" xmlns:").append(key).append("=").append(value); + } + } + + return sb.toString(); + } + + @Override + public boolean hasNext() { + return image != null; + } + + @Override + public Object next() { + return next(false); + } + + Object next(boolean first) { + Object answer = image; + // calculate next + if (scanner.hasNext()) { + image = getNext(first); + } else { + image = null; + } + + if (answer == null) { + // first time the image may be null + answer = image; + } + return answer; + } + + @Override + public void remove() { + // noop + } + + @Override + public void close() throws IOException { + scanner.close(); + } + + } + +} http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java ---------------------------------------------------------------------- diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java index 9ae1477..c5fbea4 100644 --- a/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java +++ b/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java @@ -36,7 +36,10 @@ import org.apache.camel.util.ObjectHelper; * Can be used to split big XML files. * <p/> * This implementation supports inheriting namespaces from a parent/root tag. + * + * @deprecated use {@link TokenXMLExpressionIterator} instead. */ +@Deprecated public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator { private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\""); http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java ---------------------------------------------------------------------- diff --git a/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java new file mode 100644 index 0000000..bfc3bfa --- /dev/null +++ b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.language.tokenizer; + +import org.apache.camel.ContextTestSupport; +import org.apache.camel.builder.RouteBuilder; + +public class TokenizeLanguageTest extends ContextTestSupport { + + public void testSendClosedTagMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived("<child some_attr='a' anotherAttr='a'></child>", "<child some_attr='b' anotherAttr='b'></child>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'></child><child some_attr='b' anotherAttr='b'></child></parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendClosedTagWithLineBreaksMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived("<child some_attr='a' anotherAttr='a'>\n</child>", "<child some_attr='b' anotherAttr='b'>\n</child>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?>\n" + + "<parent>\n" + + "<child some_attr='a' anotherAttr='a'>\n" + + "</child>\n" + + "<child some_attr='b' anotherAttr='b'>\n" + + "</child>\n" + + "</parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendSelfClosingTagMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived("<child some_attr='a' anotherAttr='a' />", "<child some_attr='b' anotherAttr='b' />"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a' /><child some_attr='b' anotherAttr='b' /></parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendMixedClosingTagMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived( + "<child some_attr='a' anotherAttr='a'>ha</child>", "<child some_attr='b' anotherAttr='b' />", "<child some_attr='c'></child>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'>ha</child><child some_attr='b' anotherAttr='b' /><child some_attr='c'></child></parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendNamespacedChildMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived( + "<c:child xmlns:c='urn:c' some_attr='a' anotherAttr='a'></c:child>", "<c:child xmlns:c='urn:c' some_attr='b' anotherAttr='b' />"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><parent><c:child xmlns:c='urn:c' some_attr='a' anotherAttr='a'></c:child><c:child xmlns:c='urn:c' some_attr='b' anotherAttr='b' /></parent>"); + + assertMockEndpointsSatisfied(); + } + + public void testSendNamespacedParentMessageToTokenize() throws Exception { + getMockEndpoint("mock:result").expectedBodiesReceived( + "<c:child some_attr='a' anotherAttr='a' xmlns:c='urn:c' xmlns:d=\"urn:d\"></c:child>", "<c:child some_attr='b' anotherAttr='b' xmlns:c='urn:c' xmlns:d=\"urn:d\"/>"); + + template.sendBody("direct:start", + "<?xml version='1.0' encoding='UTF-8'?><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child><c:child some_attr='b' anotherAttr='b'/></c:parent>"); + + assertMockEndpointsSatisfied(); + } + + @Override + protected RouteBuilder createRouteBuilder() { + return new RouteBuilder() { + public void configure() { + from("direct:start") + .split().tokenizeXML("child", "parent") + .to("mock:result") + .end(); + } + }; + } +}