This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1-lakehouse
in repository https://gitbox.apache.org/repos/asf/doris.git

commit f04ba345104d91183469b18fdccd53f9e509b5d2
Author: morningman <morning...@163.com>
AuthorDate: Mon Feb 17 15:31:55 2025 +0800

    Revert "[enchement](utf8)import enable_text_validate_utf8 session var 
(#45537) (#46070)"
    
    This reverts commit a380f5d2228517492b863e6998c05093842b599f.
---
 be/src/util/utf8_check.cpp                         |   7 --
 be/src/util/utf8_check.h                           |   4 -
 be/src/vec/exec/format/csv/csv_reader.cpp          |   6 +-
 .../scripts/create_preinstalled_scripts/run72.hql  |  31 -------
 .../text/utf8_check/utf8_check_fail.csv            |   5 --
 .../doris/datasource/hive/source/HiveScanNode.java |   2 -
 .../java/org/apache/doris/qe/SessionVariable.java  |   9 --
 .../ExternalFileTableValuedFunction.java           |   2 -
 gensrc/thrift/PlanNodes.thrift                     |   2 -
 .../external_table_p0/hive/test_utf8_check.out     | Bin 1193 -> 0 bytes
 .../external_table_p0/hive/test_utf8_check.groovy  | 100 ---------------------
 11 files changed, 3 insertions(+), 165 deletions(-)

diff --git a/be/src/util/utf8_check.cpp b/be/src/util/utf8_check.cpp
index f90c27e5e91..5355b901420 100644
--- a/be/src/util/utf8_check.cpp
+++ b/be/src/util/utf8_check.cpp
@@ -327,11 +327,4 @@ bool validate_utf8(const char* src, size_t len) {
     return validate_utf8_naive(src, len);
 }
 #endif
-
-bool validate_utf8(const TFileScanRangeParams& params, const char* src, size_t 
len) {
-    if (params.__isset.file_attributes && 
!params.file_attributes.enable_text_validate_utf8) {
-        return true;
-    }
-    return validate_utf8(src, len);
-}
 } // namespace doris
diff --git a/be/src/util/utf8_check.h b/be/src/util/utf8_check.h
index 7e9b7a2a9de..4214e186b71 100644
--- a/be/src/util/utf8_check.h
+++ b/be/src/util/utf8_check.h
@@ -17,8 +17,6 @@
 
 #pragma once
 
-#include <gen_cpp/PlanNodes_types.h>
-
 #include <cstddef>
 
 namespace doris {
@@ -27,6 +25,4 @@ namespace doris {
 bool validate_utf8(const char* src, size_t len);
 // check utf8 use naive c++
 bool validate_utf8_naive(const char* data, size_t len);
-
-bool validate_utf8(const TFileScanRangeParams& params, const char* src, size_t 
len);
 } // namespace doris
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 77a5b65d512..1fc3bbad294 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -715,7 +715,7 @@ Status CsvReader::_fill_empty_line(Block* block, 
std::vector<MutableColumnPtr>&
 }
 
 Status CsvReader::_validate_line(const Slice& line, bool* success) {
-    if (!_is_proto_format && !validate_utf8(_params, line.data, line.size)) {
+    if (!_is_proto_format && !validate_utf8(line.data, line.size)) {
         if (!_is_load) {
             return Status::InternalError<false>("Only support csv data in utf8 
codec");
         } else {
@@ -951,7 +951,7 @@ Status CsvReader::_parse_col_nums(size_t* col_nums) {
         return Status::InternalError<false>(
                 "The first line is empty, can not parse column numbers");
     }
-    if (!validate_utf8(_params, const_cast<char*>(reinterpret_cast<const 
char*>(ptr)), size)) {
+    if (!validate_utf8(const_cast<char*>(reinterpret_cast<const char*>(ptr)), 
size)) {
         return Status::InternalError<false>("Only support csv data in utf8 
codec");
     }
     ptr = _remove_bom(ptr, size);
@@ -968,7 +968,7 @@ Status 
CsvReader::_parse_col_names(std::vector<std::string>* col_names) {
     if (size == 0) {
         return Status::InternalError<false>("The first line is empty, can not 
parse column names");
     }
-    if (!validate_utf8(_params, const_cast<char*>(reinterpret_cast<const 
char*>(ptr)), size)) {
+    if (!validate_utf8(const_cast<char*>(reinterpret_cast<const char*>(ptr)), 
size)) {
         return Status::InternalError<false>("Only support csv data in utf8 
codec");
     }
     ptr = _remove_bom(ptr, size);
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run72.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run72.hql
deleted file mode 100644
index 1ab754b5042..00000000000
--- 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run72.hql
+++ /dev/null
@@ -1,31 +0,0 @@
-CREATE TABLE invalid_utf8_data (
-    id INT,
-    corrupted_data STRING,
-    string_data1 STRING,
-    string_data2 STRING
-)
-ROW FORMAT DELIMITED
-FIELDS TERMINATED BY ','
-LINES TERMINATED BY '\n'
-location '/user/doris/preinstalled_data/text/utf8_check';
-
-
-CREATE TABLE invalid_utf8_data2 (
-    id INT,
-    corrupted_data STRING,
-    string_data1 STRING,
-    string_data2 STRING
-)
-ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
-WITH SERDEPROPERTIES (
-    "separatorChar" = ",",
-    "quoteChar" = "\"",
-    "escapeChar" = "\\"
-)
-location '/user/doris/preinstalled_data/text/utf8_check';
-
-
-
-msck repair table invalid_utf8_data;
-msck repair table invalid_utf8_data2;
-
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/text/utf8_check/utf8_check_fail.csv
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/text/utf8_check/utf8_check_fail.csv
deleted file mode 100644
index 391cd493660..00000000000
--- 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/text/utf8_check/utf8_check_fail.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-1,�,AAB,helloworld
-2,��,AAB,helloworld
-2,���,AAB,helloworld
-4,����,AAB,helloworld
-5,�����,AAB,helloworld
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 8f965ddf022..7b0b3fd1c19 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -452,8 +452,6 @@ public class HiveScanNode extends FileQueryScanNode {
         TFileAttributes fileAttributes = new TFileAttributes();
         fileAttributes.setTextParams(textParams);
         fileAttributes.setHeaderType("");
-        fileAttributes.setEnableTextValidateUtf8(
-                
ConnectContext.get().getSessionVariable().enableTextValidateUtf8);
         if (textParams.isSet(TFileTextScanRangeParams._Fields.ENCLOSE)) {
             fileAttributes.setTrimDoubleQuotes(true);
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index f68fd1423c3..5e0c3b345e3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -667,8 +667,6 @@ public class SessionVariable implements Serializable, 
Writable {
      */
     public static final String ENABLE_AUTO_CREATE_WHEN_OVERWRITE = 
"enable_auto_create_when_overwrite";
 
-    public static final String ENABLE_TEXT_VALIDATE_UTF8 = 
"enable_text_validate_utf8";
-
     /**
      * If set false, user couldn't submit analyze SQL and FE won't allocate 
any related resources.
      */
@@ -2230,13 +2228,6 @@ public class SessionVariable implements Serializable, 
Writable {
     })
     public boolean enableAutoCreateWhenOverwrite = false;
 
-    @VariableMgr.VarAttr(name = ENABLE_TEXT_VALIDATE_UTF8, needForward = true, 
description = {
-            "对于 text 类型的文件读取,是否开启utf8编码检查。非utf8字符会显示成乱码。",
-            "For text type file reading, whether to enable utf8 encoding 
check."
-                    + "non-utf8 characters will be displayed as garbled 
characters."
-    })
-    public boolean enableTextValidateUtf8 = true;
-
     @VariableMgr.VarAttr(name = SKIP_CHECKING_ACID_VERSION_FILE, needForward = 
true, description = {
             "跳过检查 transactional hive 版本文件 '_orc_acid_version.'",
             "Skip checking transactional hive version file 
'_orc_acid_version.'"
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
index cb1a2d89c5d..1f65921832b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
@@ -304,8 +304,6 @@ public abstract class ExternalFileTableValuedFunction 
extends TableValuedFunctio
             fileAttributes.setHeaderType(this.headerType);
             fileAttributes.setTrimDoubleQuotes(trimDoubleQuotes);
             fileAttributes.setSkipLines(skipLines);
-            fileAttributes.setEnableTextValidateUtf8(
-                    
ConnectContext.get().getSessionVariable().enableTextValidateUtf8);
         } else if (this.fileFormatType == TFileFormatType.FORMAT_JSON) {
             fileAttributes.setJsonRoot(jsonRoot);
             fileAttributes.setJsonpaths(jsonPaths);
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index 7ccb12b3331..1b873787765 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -284,8 +284,6 @@ struct TFileAttributes {
     10: optional bool trim_double_quotes;
     // csv skip line num, only used when csv header_type is not set.
     11: optional i32 skip_lines;
-    //For text type file reading, whether to enable utf8 encoding 
check.(Catalog && TVF)
-    12: optional bool enable_text_validate_utf8 = true;
 }
 
 struct TIcebergDeleteFileDesc {
diff --git a/regression-test/data/external_table_p0/hive/test_utf8_check.out 
b/regression-test/data/external_table_p0/hive/test_utf8_check.out
deleted file mode 100644
index 7557e789d49..00000000000
Binary files a/regression-test/data/external_table_p0/hive/test_utf8_check.out 
and /dev/null differ
diff --git 
a/regression-test/suites/external_table_p0/hive/test_utf8_check.groovy 
b/regression-test/suites/external_table_p0/hive/test_utf8_check.groovy
deleted file mode 100644
index aa26fdede73..00000000000
--- a/regression-test/suites/external_table_p0/hive/test_utf8_check.groovy
+++ /dev/null
@@ -1,100 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-
-suite("test_utf8_check","p0,external,tvf,hive,external_docker,external_docker_hive")
 {
-    String enabled = context.config.otherConfigs.get("enableHiveTest")
-    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
-        logger.info("diable Hive test.")
-        return;
-    }
-
-    for (String hivePrefix : ["hive2","hive3"]) {
-    
-        String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
-        String catalog_name = "${hivePrefix}_test_utf8_check"
-        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
-        def hdfsUserName = "doris"
-        String hdfs_port = context.config.otherConfigs.get(hivePrefix + 
"HdfsPort")
-        def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}"
-
-        sql """drop catalog if exists ${catalog_name}"""
-        sql """create catalog if not exists ${catalog_name} properties (
-            "type"="hms",
-            'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
-        );"""
-        sql """use `${catalog_name}`.`default`"""
-        
-
-        sql """ set enable_text_validate_utf8 = true; """     
-
-        test {
-            sql """ select * from invalid_utf8_data """ 
-            exception """Only support csv data in utf8 codec"""
-        }
-        
-        
-        test {
-            sql """ select * from invalid_utf8_data2; """ 
-            exception """Only support csv data in utf8 codec"""
-        }
-
-
-        def uri = "${defaultFS}" + 
"/user/doris/preinstalled_data/text/utf8_check/utf8_check_fail.csv"
-
-        
-        test {
-            sql """ desc function  HDFS(
-                "uri" = "${uri}",
-                "hadoop.username" = "${hdfsUserName}",            
-                "format" = "csv",
-                "column_separator"=",")"""    
-            exception """Only support csv data in utf8 codec"""
-        }
-
-        test {
-            sql """select * from HDFS(
-                "uri" = "${uri}",
-                "hadoop.username" = "${hdfsUserName}",            
-                "format" = "csv",
-                "column_separator"=",")"""    
-            exception """Only support csv data in utf8 codec"""
-        }
-    
-
-        sql """ set enable_text_validate_utf8 = false; """     
-
-        qt_1 """select * from invalid_utf8_data order by id """ 
-    
-        qt_2 """ desc function  HDFS(
-                "uri" = "${uri}",
-                "hadoop.username" = "${hdfsUserName}",            
-                "format" = "csv",
-                "column_separator"=",")"""    
-
-
-        qt_3 """select * from   HDFS(
-                "uri" = "${uri}",
-                "hadoop.username" = "${hdfsUserName}",            
-                "format" = "csv",
-                "column_separator"=",") order by c1"""    
-        qt_4 """select * from invalid_utf8_data2 order by id """ 
-    
-    
-    }
-
-}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to