morningman commented on a change in pull request #3230: Support load json-data into Doris by RoutineLoad or StreamLoad URL: https://github.com/apache/incubator-doris/pull/3230#discussion_r410713892
########## File path: be/src/exprs/json_functions.cpp ########## @@ -209,31 +162,138 @@ rapidjson::Value* JsonFunctions::get_json_object( } else if (root->IsObject()){ if (!root->HasMember(col.c_str())) { root->SetNull(); + continue; } else { root = &((*root)[col.c_str()]); } } else { // root is not a nested type, return NULL root->SetNull(); + continue; } } if (UNLIKELY(index != -1)) { // judge the rapidjson:Value, which base the top's result, // if not array return NULL;else get the index value from the array if (root->IsArray()) { - if (root->IsNull() || index >= root->Size()) { + if (root->IsNull()) { + root->SetNull(); + continue; + } else if (index == -2) { + // [*] + array_obj = static_cast<rapidjson::Value*>( + document->GetAllocator().Malloc(sizeof(rapidjson::Value))); + array_obj->SetArray(); + + for (int j = 0; j < root->Size(); j++) { + rapidjson::Value v; + v.CopyFrom((*root)[j], document->GetAllocator()); + array_obj->PushBack(v, document->GetAllocator()); + } + root = array_obj; + // is_arr_set_by_last = true; + } else if (index >= root->Size()) { root->SetNull(); + continue; } else { root = &((*root)[index]); } } else { root->SetNull(); + continue; } } - } + } + return root; +} + +rapidjson::Value* JsonFunctions::get_json_object( + FunctionContext* context, + const std::string& json_string, + const std::string& path_string, + const JsonFunctionType& fntype, + rapidjson::Document* document) { + + // split path by ".", and escape quota by "\" + // eg: + // '$.text#abc.xyz' -> [$, text#abc, xyz] + // '$."text.abc".xyz' -> [$, text.abc, xyz] + // '$."text.abc"[1].xyz' -> [$, text.abc[1], xyz] + std::vector<JsonPath>* parsed_paths; + std::vector<JsonPath> tmp_parsed_paths; +#ifndef BE_TEST + parsed_paths = reinterpret_cast<std::vector<JsonPath>*>(context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + if (parsed_paths == nullptr) { + boost::tokenizer<boost::escaped_list_separator<char> > tok(path_string, boost::escaped_list_separator<char>("\\", ".", "\"")); + std::vector<std::string> paths(tok.begin(), tok.end()); + get_parsed_paths(paths, &tmp_parsed_paths); + parsed_paths = &tmp_parsed_paths; + } +#else + boost::tokenizer<boost::escaped_list_separator<char> > tok(path_string, boost::escaped_list_separator<char>("\\", ".", "\"")); + std::vector<std::string> paths(tok.begin(), tok.end()); + get_parsed_paths(paths, &tmp_parsed_paths); + parsed_paths = &tmp_parsed_paths; +#endif + + VLOG(10) << "first parsed path: " << (*parsed_paths)[0].debug_string(); + + if (!(*parsed_paths)[0].is_valid) { + return document; + } + + if (UNLIKELY((*parsed_paths).size() == 1)) { + if (fntype == JSON_FUN_STRING) { + document->SetString(json_string.c_str(), document->GetAllocator()); + } else { + return document; + } + } - return root; + //rapidjson::Document document; + document->Parse(json_string.c_str()); + if (UNLIKELY(document->HasParseError())) { + VLOG(1) << "Error at offset " << document->GetErrorOffset() + << ": " << GetParseError_En(document->GetParseError()); + document->SetNull(); + return document; + } + return match_value(*parsed_paths, document); +} + + +rapidjson::Value* JsonFunctions::get_json_object_simple ( Review comment: ```suggestion rapidjson::Value* JsonFunctions::get_json_object_from_parsed_json( ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org