This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository 
https://gitbox.apache.org/repos/asf/incubator-doris-spark-connector.git

commit ad9eff5f7ce932c71c6de78ee764ab10047b6f0e
Author: HuangWei <huang...@apache.org>
AuthorDate: Sun Jan 10 20:48:46 2021 +0800

    [Spark on Doris] fix the encode of varchar when convertArrowToRowBatch 
(#5202)
    
    `convertArrowToRowBatch` use the default charset to encode String.
    Set it to UTF_8, because we use `arrow::utf8` on the Backends.
---
 .../apache/doris/spark/serialization/RowBatch.java | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/src/main/java/org/apache/doris/spark/serialization/RowBatch.java 
b/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
index 0781f1e..ad3cfe5 100644
--- a/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
+++ b/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
@@ -20,6 +20,7 @@ package org.apache.doris.spark.serialization;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NoSuchElementException;
@@ -52,10 +53,10 @@ import com.google.common.base.Preconditions;
  * row batch data container.
  */
 public class RowBatch {
-    private static Logger logger = LoggerFactory.getLogger(RowBatch.class);
+    private static final Logger logger = 
LoggerFactory.getLogger(RowBatch.class);
 
     public static class Row {
-        private List<Object> cols;
+        private final List<Object> cols;
 
         Row(int colCount) {
             this.cols = new ArrayList<>(colCount);
@@ -74,11 +75,10 @@ public class RowBatch {
     private int offsetInRowBatch = 0;
     private int rowCountInOneBatch = 0;
     private int readRowCount = 0;
-    private List<Row> rowBatch = new ArrayList<>();
+    private final List<Row> rowBatch = new ArrayList<>();
     private final ArrowStreamReader arrowStreamReader;
-    private final VectorSchemaRoot root;
     private List<FieldVector> fieldVectors;
-    private RootAllocator rootAllocator;
+    private final RootAllocator rootAllocator;
     private final Schema schema;
 
     public RowBatch(TScanBatchResult nextResult, Schema schema) throws 
DorisException {
@@ -88,9 +88,8 @@ public class RowBatch {
                 new ByteArrayInputStream(nextResult.getRows()),
                 rootAllocator
                 );
-        this.offsetInRowBatch = 0;
         try {
-            this.root = arrowStreamReader.getVectorSchemaRoot();
+            VectorSchemaRoot root = arrowStreamReader.getVectorSchemaRoot();
             while (arrowStreamReader.loadNextBatch()) {
                 fieldVectors = root.getFieldVectors();
                 if (fieldVectors.size() != schema.size()) {
@@ -119,10 +118,7 @@ public class RowBatch {
     }
 
     public boolean hasNext() {
-        if (offsetInRowBatch < readRowCount) {
-            return true;
-        }
-        return false;
+        return offsetInRowBatch < readRowCount;
     }
 
     private void addValueToRow(int rowIndex, Object obj) {
@@ -268,7 +264,7 @@ public class RowBatch {
                                 addValueToRow(rowIndex, null);
                                 continue;
                             }
-                            String value = new 
String(varCharVector.get(rowIndex));
+                            String value = new 
String(varCharVector.get(rowIndex), StandardCharsets.UTF_8);
                             addValueToRow(rowIndex, value);
                         }
                         break;
@@ -284,7 +280,7 @@ public class RowBatch {
         }
     }
 
-    public List<Object> next() throws DorisException {
+    public List<Object> next() {
         if (!hasNext()) {
             String errMsg = "Get row offset:" + offsetInRowBatch + " larger 
than row size: " + readRowCount;
             logger.error(errMsg);

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to