This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8d6474a1a72182b4cbc4fd8b5339f233de904899
Author: jiafeng.zhang <zhang...@gmail.com>
AuthorDate: Mon Jul 31 12:57:10 2023 +0800

    [fix](datax)doris writer url decoder fix #22401
    
    When the user imports data, there are some special characters in the data, 
which will cause the import to fail
    The following error message appears:
    
    2023-07-28 15:15:28.960  INFO 21756 --- [-interval-flush] 
c.a.d.p.w.d.DorisWriterManager           : Doris interval Sinking triggered: 
label[datax_doris_writer_7aa415e6-5a9c-4070-a699-70b4a627ae64].
    2023-07-28 15:15:29.015  INFO 21756 --- [       Thread-3] 
c.a.d.p.w.d.DorisStreamLoadObserver      : Start to join batch data: 
rows[95968] bytes[3815834] 
label[datax_doris_writer_7aa415e6-5a9c-4070-a699-70b4a627ae64].
    2023-07-28 15:15:29.038  INFO 21756 --- [       Thread-3] 
c.a.d.p.w.d.DorisStreamLoadObserver      : Executing stream load to: 
'http://10.38.60.218:8030/api/ods_prod/ods_pexweb_online_product/_stream_load', 
size: '3911802'
    2023-07-28 15:15:31.559  WARN 21756 --- [       Thread-3] 
c.a.d.p.w.d.DorisStreamLoadObserver      : Request failed with code:500
    2023-07-28 15:15:31.561  INFO 21756 --- [       Thread-3] 
c.a.d.p.w.d.DorisStreamLoadObserver      : StreamLoad response :null
    2023-07-28 15:15:31.564  WARN 21756 --- [       Thread-3] 
c.a.d.p.w.d.DorisWriterManager           : Failed to flush batch data to Doris, 
retry times = 0
    
    java.io.IOException: Unable to flush data to Doris: unknown result status.
            at 
com.alibaba.datax.plugin.writer.doriswriter.DorisStreamLoadObserver.streamLoad(DorisStreamLoadObserver.java:66)
 ~[doriswriter-0.0.1-SNAPSHOT.jar:na]
            at 
com.alibaba.datax.plugin.writer.doriswriter.DorisWriterManager.asyncFlush(DorisWriterManager.java:163)
 [doriswriter-0.0.1-SNAPSHOT.jar:na]
            at 
com.alibaba.datax.plugin.writer.doriswriter.DorisWriterManager.access$000(DorisWriterManager.java:19)
 [doriswriter-0.0.1-SNAPSHOT.jar:na]
            at 
com.alibaba.datax.plugin.writer.doriswriter.DorisWriterManager$1.run(DorisWriterManager.java:134)
 [doriswriter-0.0.1-SNAPSHOT.jar:na]
            at java.lang.Thread.run(Thread.java:748) [na:1.8.0_221]
    
    在fe.log日志中发现下面的错误信息:
    
    ava.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in 
escape (%) pattern - For input string: " l"
            at java.net.URLDecoder.decode(URLDecoder.java:194) ~[?:1.8.0_221]
            at 
org.springframework.http.converter.FormHttpMessageConverter.read(FormHttpMessageConverter.java:352)
 ~[spring-web-5.3.22.jar:5.3.22]
            at 
org.springframework.web.filter.FormContentFilter.parseIfNecessary(FormContentFilter.java:109)
 ~[spring-web-5.3.22.jar:5.3.22]
            at 
org.springframework.web.filter.FormContentFilter.doFilterInternal(FormContentFilter.java:88)
 ~[spring-web-5.3.22.jar:5.3.22]
            at 
org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:117)
 ~[spring-web-5.3.22.jar:5.3.22]
            at 
org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) 
~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622]
            at 
org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626)
 ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622]
            at 
org.springframework.web.filter.CharacterEncodingFilter.doFilterInternal(CharacterEncodingFilter.java:201)
 ~[spring-web-5.3.22.jar:5.3.22]
            at 
org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:117)
 ~[spring-web-5.3.22.jar:5.3.22]
            at 
org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) 
~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622]
            at 
org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626)
 ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622]
            at 
org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:552) 
~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622]
            at 
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) 
~[jetty-server-9.4.48.v20220622.jar:9.4.48.v20220622]
            at 
org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:600) 
~[jetty-security-9.4.48.v20220622.jar:9.4.48.v20220622]
            at 
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) 
~[jetty-server-9.4.48.v20220622.jar:9.4.48.v20220622]
            at 
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandle
---
 .../plugin/writer/doriswriter/DorisStreamLoadObserver.java  | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git 
a/extension/DataX/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java
 
b/extension/DataX/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java
index 3435b391d5..c3bcfadb19 100644
--- 
a/extension/DataX/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java
+++ 
b/extension/DataX/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java
@@ -64,6 +64,18 @@ public class DorisStreamLoadObserver {
         this.options = options;
     }
 
+    public String urlDecode(String outBuffer) {
+        String data = outBuffer;
+        try {
+            data = data.replaceAll("%(?![0-9a-fA-F]{2})", "%25");
+            data = data.replaceAll("\\+", "%2B");
+            data = URLDecoder.decode(data, "utf-8");
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return data;
+    }
+
     public void streamLoad(WriterTuple data) throws Exception {
         String host = getLoadHost();
         if(host == null){
@@ -77,6 +89,7 @@ public class DorisStreamLoadObserver {
                 .append("/_stream_load")
                 .toString();
         LOG.info("Start to join batch data: rows[{}] bytes[{}] label[{}].", 
data.getRows().size(), data.getBytes(), data.getLabel());
+        loadUrl = urlDecode(loadUrl);
         Map<String, Object> loadResult = put(loadUrl, data.getLabel(), 
addRows(data.getRows(), data.getBytes().intValue()));
         LOG.info("StreamLoad response :{}",JSON.toJSONString(loadResult));
         final String keyStatus = "Status";


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to