morningman commented on code in PR #51936: URL: https://github.com/apache/doris/pull/51936#discussion_r2160640784
########## fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java: ########## @@ -88,6 +88,13 @@ public static String getFieldDelimiter(Table table) { DEFAULT_FIELD_DELIMITER, fieldDelim, serFormat)); } + public static String getMultiDelimitFieldDelimiter(Table table) { Review Comment: This method is same as `getFieldDelimiter()`? ########## be/src/vec/exec/format/text/text_reader.cpp: ########## @@ -41,18 +44,62 @@ namespace doris::vectorized { void HiveTextFieldSplitter::do_split(const Slice& line, std::vector<Slice>* splitted_values) { const char* data = line.data; const size_t size = line.size; - size_t value_start = 0; - for (size_t i = 0; i < size; ++i) { - if (data[i] == _value_sep[0]) { - // hive will escape the field separator in string - if (_escape_char != 0 && i > 0 && data[i - 1] == _escape_char) { - continue; + if (_value_sep_len == 1) { + size_t value_start = 0; + for (size_t i = 0; i < size; ++i) { + if (data[i] == _value_sep[0]) { + // hive will escape the field separator in string + if (_escape_char != 0 && i > 0 && data[i - 1] == _escape_char) { + continue; + } + process_value_func(data, value_start, i - value_start, _trimming_char, + splitted_values); + value_start = i + _value_sep_len; + } + } + process_value_func(data, value_start, size - value_start, _trimming_char, splitted_values); + } else { + size_t start = 0; Review Comment: Need to add unit test for this algorithm ########## docker/thirdparties/docker-compose/hive/scripts/data/regression/multi_delimit_serde/create_table.hql: ########## @@ -0,0 +1,49 @@ +CREATE DATABASE IF NOT EXISTS regression; +USE regression; + +CREATE TABLE `multi_delimit_test`( Review Comment: How about adding array and map type to test 'mapkey.delim' and 'collection.delim' too? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org