spaces-X commented on code in PR #20327: URL: https://github.com/apache/doris/pull/20327#discussion_r1222530731
########## be/src/tools/build_segment_tool/build_helper.cpp: ########## @@ -0,0 +1,196 @@ +#include "tools/build_segment_tool/build_helper.h" + +#include <cstdlib> +#include <filesystem> +#include <fstream> +#include <iostream> +#include <set> +#include <sstream> +#include <string> + +#include "common/status.h" +#include "common/config.h" +#include "olap/storage_engine.h" +#include "olap/tablet.h" +#include "olap/tablet_meta.h" +#include "olap/tablet_manager.h" +#include "olap/tablet_schema_cache.h" +#include "olap/file_header.h" +#include "runtime/exec_env.h" +#include "tools/build_segment_tool/builder_scanner_memtable.h" +#include "util/disk_info.h" +#include "util/mem_info.h" + +namespace doris { + +BuildHelper* BuildHelper::_s_instance = nullptr; + +BuildHelper* BuildHelper::init_instance() { + // DCHECK(_s_instance == nullptr); + static BuildHelper instance; + _s_instance = &instance; + return _s_instance; +} + +void BuildHelper::initial_build_env() { + char doris_home[] = "DORIS_HOME=/tmp"; + putenv(doris_home); + + if (!doris::config::init(nullptr, true, false, true)) { + LOG(FATAL) << "init config fail"; + exit(-1); + } + CpuInfo::init(); + DiskInfo::init(); + MemInfo::init(); + // write buffer size before flush + config::write_buffer_size = 209715200; + // max buffer size used in memtable for the aggregated table + config::write_buffer_size_for_agg = 8194304000; + // CONF_mInt64(memtable_max_buffer_size, "8194304000"); + + // std::shared_ptr<doris::MemTrackerLimiter> process_mem_tracker = + // std::make_shared<doris::MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, "Process"); + // doris::ExecEnv::GetInstance()->set_orphan_mem_tracker(process_mem_tracker); + // doris::thread_context()->thread_mem_tracker_mgr->attach_limiter_tracker(process_mem_tracker, + // TUniqueId()); + + // doris::thread_context()->thread_mem_tracker_mgr->init(); + // doris::thread_context()->thread_mem_tracker_mgr->set_check_limit(false); + doris::TabletSchemaCache::create_global_schema_cache(); + doris::ChunkAllocator::init_instance(4096); + +} + +void BuildHelper::open(const std::string& meta_file, const std::string& build_dir, + const std::string& data_path, const std::string& file_type) { + _meta_file = meta_file; + _build_dir = build_dir; + if (data_path.at(data_path.size() - 1) != '/') { + _data_path = data_path + "/"; + } else { + _data_path = data_path; + } + + _file_type = file_type; + + std::filesystem::path dir_path(std::filesystem::absolute(std::filesystem::path(build_dir))); + if (!std::filesystem::is_directory(std::filesystem::status(dir_path))) { + LOG(FATAL) << "build dir should be a directory"; + } + + // init and open storage engine + std::vector<doris::StorePath> paths; + auto olap_res = doris::parse_conf_store_paths(_build_dir, &paths); + if (!olap_res) { + LOG(FATAL) << "parse config storage path failed, path=" << doris::config::storage_root_path; + exit(-1); + } + doris::ExecEnv::init(doris::ExecEnv::GetInstance(), paths); + + doris::EngineOptions options; + options.store_paths = paths; + options.backend_uid = doris::UniqueId::gen_uid(); + doris::StorageEngine* engine = nullptr; + auto st = doris::StorageEngine::open(options, &engine); + if (!st.ok()) { + LOG(FATAL) << "fail to open StorageEngine, res=" << st; + exit(-1); + } +} + +std::string BuildHelper::read_local_file(const std::string& file) { Review Comment: useless function? ########## be/src/tools/build_segment_tool/build_helper.cpp: ########## @@ -0,0 +1,196 @@ +#include "tools/build_segment_tool/build_helper.h" + +#include <cstdlib> +#include <filesystem> +#include <fstream> +#include <iostream> +#include <set> +#include <sstream> +#include <string> + +#include "common/status.h" +#include "common/config.h" +#include "olap/storage_engine.h" +#include "olap/tablet.h" +#include "olap/tablet_meta.h" +#include "olap/tablet_manager.h" +#include "olap/tablet_schema_cache.h" +#include "olap/file_header.h" +#include "runtime/exec_env.h" +#include "tools/build_segment_tool/builder_scanner_memtable.h" +#include "util/disk_info.h" +#include "util/mem_info.h" + +namespace doris { + +BuildHelper* BuildHelper::_s_instance = nullptr; + +BuildHelper* BuildHelper::init_instance() { + // DCHECK(_s_instance == nullptr); + static BuildHelper instance; + _s_instance = &instance; + return _s_instance; +} + +void BuildHelper::initial_build_env() { + char doris_home[] = "DORIS_HOME=/tmp"; + putenv(doris_home); + + if (!doris::config::init(nullptr, true, false, true)) { + LOG(FATAL) << "init config fail"; + exit(-1); + } + CpuInfo::init(); + DiskInfo::init(); + MemInfo::init(); + // write buffer size before flush + config::write_buffer_size = 209715200; + // max buffer size used in memtable for the aggregated table + config::write_buffer_size_for_agg = 8194304000; + // CONF_mInt64(memtable_max_buffer_size, "8194304000"); + + // std::shared_ptr<doris::MemTrackerLimiter> process_mem_tracker = + // std::make_shared<doris::MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, "Process"); + // doris::ExecEnv::GetInstance()->set_orphan_mem_tracker(process_mem_tracker); + // doris::thread_context()->thread_mem_tracker_mgr->attach_limiter_tracker(process_mem_tracker, + // TUniqueId()); + + // doris::thread_context()->thread_mem_tracker_mgr->init(); + // doris::thread_context()->thread_mem_tracker_mgr->set_check_limit(false); + doris::TabletSchemaCache::create_global_schema_cache(); + doris::ChunkAllocator::init_instance(4096); + +} + +void BuildHelper::open(const std::string& meta_file, const std::string& build_dir, + const std::string& data_path, const std::string& file_type) { + _meta_file = meta_file; + _build_dir = build_dir; + if (data_path.at(data_path.size() - 1) != '/') { + _data_path = data_path + "/"; + } else { + _data_path = data_path; + } + + _file_type = file_type; + + std::filesystem::path dir_path(std::filesystem::absolute(std::filesystem::path(build_dir))); + if (!std::filesystem::is_directory(std::filesystem::status(dir_path))) { + LOG(FATAL) << "build dir should be a directory"; + } + + // init and open storage engine + std::vector<doris::StorePath> paths; + auto olap_res = doris::parse_conf_store_paths(_build_dir, &paths); + if (!olap_res) { + LOG(FATAL) << "parse config storage path failed, path=" << doris::config::storage_root_path; + exit(-1); + } + doris::ExecEnv::init(doris::ExecEnv::GetInstance(), paths); + + doris::EngineOptions options; + options.store_paths = paths; + options.backend_uid = doris::UniqueId::gen_uid(); + doris::StorageEngine* engine = nullptr; + auto st = doris::StorageEngine::open(options, &engine); + if (!st.ok()) { + LOG(FATAL) << "fail to open StorageEngine, res=" << st; + exit(-1); + } +} + +std::string BuildHelper::read_local_file(const std::string& file) { + std::filesystem::path path(std::filesystem::absolute(std::filesystem::path(file))); + if (!std::filesystem::exists(path)) { LOG(FATAL) << "file not exist:" << file; + } + + std::ifstream f(path, std::ios::in | std::ios::binary); + const auto sz = std::filesystem::file_size(path); + std::string result(sz, '\0'); Review Comment: It will cause out of memory, when the file is too large. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org