morningman commented on a change in pull request #4136: URL: https://github.com/apache/incubator-doris/pull/4136#discussion_r458546950
########## File path: be/src/exec/json_scanner.h ########## @@ -106,33 +105,31 @@ struct JsonPath; class JsonReader { public: JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, FileReader* file_reader, - std::string& jsonpath, bool strip_outer_array); + bool strip_outer_array); ~JsonReader(); - Status init(); // must call before use + Status init(std::string& jsonpath, std::string& json_root); // must call before use Status read(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof); private: + Status (JsonReader::*_handle_json_callback)(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof); Status _handle_simple_json(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof); - Status _handle_complex_json(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof); Status _handle_flat_array_complex_json(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof); Status _handle_nested_complex_json(Tuple* tuple, const std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof); void _fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, int32_t len); - void _assemble_jmap(const std::vector<SlotDescriptor*>& slot_descs); + int _assemble_jmap(const std::vector<SlotDescriptor*>& slot_descs); Review comment: Remove this method ########## File path: be/src/exec/json_scanner.cpp ########## @@ -197,30 +201,52 @@ JsonReader::~JsonReader() { _close(); } -Status JsonReader::init() { +Status JsonReader::init(std::string& jsonpath, std::string& json_root) { // parse jsonpath + if (!jsonpath.empty()) { + Status st = _generater_json_paths(jsonpath, _parsed_jsonpaths); Review comment: ```suggestion Status st = _generate_json_paths(jsonpath, _parsed_jsonpaths); ``` ########## File path: be/src/exec/json_scanner.cpp ########## @@ -197,30 +201,52 @@ JsonReader::~JsonReader() { _close(); } -Status JsonReader::init() { +Status JsonReader::init(std::string& jsonpath, std::string& json_root) { // parse jsonpath + if (!jsonpath.empty()) { + Status st = _generater_json_paths(jsonpath, _parsed_jsonpaths); + RETURN_IF_ERROR(st); + } + if (!json_root.empty()) { + std::vector<JsonPath> parsed_paths; + JsonFunctions::parse_json_paths(json_root, &parsed_paths); + _parsed_json_root.push_back(parsed_paths); Review comment: We only support one json root, so we can check it here, and return error if json root more than one. ########## File path: be/src/exec/json_scanner.h ########## @@ -106,33 +105,31 @@ struct JsonPath; class JsonReader { public: JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, FileReader* file_reader, - std::string& jsonpath, bool strip_outer_array); + bool strip_outer_array); ~JsonReader(); - Status init(); // must call before use + Status init(std::string& jsonpath, std::string& json_root); // must call before use Review comment: Can these parameters be changed to const &? ########## File path: be/src/exec/json_scanner.cpp ########## @@ -257,26 +283,38 @@ Status JsonReader::_parse_json_doc(bool* eof) { delete[] json_str; return Status::DataQualityError(str_error.str()); } + delete[] json_str; - if (_json_doc.IsArray() && !_strip_outer_array) { - delete[] json_str; + // set json root + if (_parsed_json_root.size() != 0) { Review comment: Why _parsed_json_root is a vector? ########## File path: be/src/exec/json_scanner.h ########## @@ -144,9 +141,9 @@ class JsonReader { RuntimeProfile::Counter* _bytes_read_counter; RuntimeProfile::Counter* _read_timer; std::vector<std::vector<JsonPath>> _parsed_jsonpaths; + std::vector<std::vector<JsonPath>> _parsed_json_root; rapidjson::Document _json_doc; - //key: column name - std::unordered_map<std::string, JsonDataInternal> _jmap; + rapidjson::Value *_json_root; Review comment: change the name of `_json_root`. This name is easily confused with `_parsed_json_root`. My suggestion: `_final_json_doc`. And add comment to explain that this is generated from `_json_doc` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org