This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8d6474a1a72182b4cbc4fd8b5339f233de904899 Author: jiafeng.zhang <zhang...@gmail.com> AuthorDate: Mon Jul 31 12:57:10 2023 +0800 [fix](datax)doris writer url decoder fix #22401 When the user imports data, there are some special characters in the data, which will cause the import to fail The following error message appears: 2023-07-28 15:15:28.960 INFO 21756 --- [-interval-flush] c.a.d.p.w.d.DorisWriterManager : Doris interval Sinking triggered: label[datax_doris_writer_7aa415e6-5a9c-4070-a699-70b4a627ae64]. 2023-07-28 15:15:29.015 INFO 21756 --- [ Thread-3] c.a.d.p.w.d.DorisStreamLoadObserver : Start to join batch data: rows[95968] bytes[3815834] label[datax_doris_writer_7aa415e6-5a9c-4070-a699-70b4a627ae64]. 2023-07-28 15:15:29.038 INFO 21756 --- [ Thread-3] c.a.d.p.w.d.DorisStreamLoadObserver : Executing stream load to: 'http://10.38.60.218:8030/api/ods_prod/ods_pexweb_online_product/_stream_load', size: '3911802' 2023-07-28 15:15:31.559 WARN 21756 --- [ Thread-3] c.a.d.p.w.d.DorisStreamLoadObserver : Request failed with code:500 2023-07-28 15:15:31.561 INFO 21756 --- [ Thread-3] c.a.d.p.w.d.DorisStreamLoadObserver : StreamLoad response :null 2023-07-28 15:15:31.564 WARN 21756 --- [ Thread-3] c.a.d.p.w.d.DorisWriterManager : Failed to flush batch data to Doris, retry times = 0 java.io.IOException: Unable to flush data to Doris: unknown result status. at com.alibaba.datax.plugin.writer.doriswriter.DorisStreamLoadObserver.streamLoad(DorisStreamLoadObserver.java:66) ~[doriswriter-0.0.1-SNAPSHOT.jar:na] at com.alibaba.datax.plugin.writer.doriswriter.DorisWriterManager.asyncFlush(DorisWriterManager.java:163) [doriswriter-0.0.1-SNAPSHOT.jar:na] at com.alibaba.datax.plugin.writer.doriswriter.DorisWriterManager.access$000(DorisWriterManager.java:19) [doriswriter-0.0.1-SNAPSHOT.jar:na] at com.alibaba.datax.plugin.writer.doriswriter.DorisWriterManager$1.run(DorisWriterManager.java:134) [doriswriter-0.0.1-SNAPSHOT.jar:na] at java.lang.Thread.run(Thread.java:748) [na:1.8.0_221] 在fe.log日志中发现下面的错误信息: ava.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - For input string: " l" at java.net.URLDecoder.decode(URLDecoder.java:194) ~[?:1.8.0_221] at org.springframework.http.converter.FormHttpMessageConverter.read(FormHttpMessageConverter.java:352) ~[spring-web-5.3.22.jar:5.3.22] at org.springframework.web.filter.FormContentFilter.parseIfNecessary(FormContentFilter.java:109) ~[spring-web-5.3.22.jar:5.3.22] at org.springframework.web.filter.FormContentFilter.doFilterInternal(FormContentFilter.java:88) ~[spring-web-5.3.22.jar:5.3.22] at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:117) ~[spring-web-5.3.22.jar:5.3.22] at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622] at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622] at org.springframework.web.filter.CharacterEncodingFilter.doFilterInternal(CharacterEncodingFilter.java:201) ~[spring-web-5.3.22.jar:5.3.22] at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:117) ~[spring-web-5.3.22.jar:5.3.22] at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622] at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622] at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:552) ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622] at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) ~[jetty-server-9.4.48.v20220622.jar:9.4.48.v20220622] at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:600) ~[jetty-security-9.4.48.v20220622.jar:9.4.48.v20220622] at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) ~[jetty-server-9.4.48.v20220622.jar:9.4.48.v20220622] at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandle --- .../plugin/writer/doriswriter/DorisStreamLoadObserver.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/extension/DataX/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java b/extension/DataX/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java index 3435b391d5..c3bcfadb19 100644 --- a/extension/DataX/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java +++ b/extension/DataX/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java @@ -64,6 +64,18 @@ public class DorisStreamLoadObserver { this.options = options; } + public String urlDecode(String outBuffer) { + String data = outBuffer; + try { + data = data.replaceAll("%(?![0-9a-fA-F]{2})", "%25"); + data = data.replaceAll("\\+", "%2B"); + data = URLDecoder.decode(data, "utf-8"); + } catch (Exception e) { + e.printStackTrace(); + } + return data; + } + public void streamLoad(WriterTuple data) throws Exception { String host = getLoadHost(); if(host == null){ @@ -77,6 +89,7 @@ public class DorisStreamLoadObserver { .append("/_stream_load") .toString(); LOG.info("Start to join batch data: rows[{}] bytes[{}] label[{}].", data.getRows().size(), data.getBytes(), data.getLabel()); + loadUrl = urlDecode(loadUrl); Map<String, Object> loadResult = put(loadUrl, data.getLabel(), addRows(data.getRows(), data.getBytes().intValue())); LOG.info("StreamLoad response :{}",JSON.toJSONString(loadResult)); final String keyStatus = "Status"; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org