wgtmac commented on code in PR #108: URL: https://github.com/apache/iceberg-cpp/pull/108#discussion_r2106412663
########## src/iceberg/table_metadata.cc: ########## @@ -153,14 +154,70 @@ Result<MetadataFileCodecType> TableMetadataUtil::CodecFromFileName( return MetadataFileCodecType::kNone; } +class GZipDecompressor { + public: + GZipDecompressor() { memset(&stream_, 0, sizeof(stream_)); } + + ~GZipDecompressor() { + if (initialized_) { + inflateEnd(&stream_); + } + } + + Status Init() { + int ret = inflateInit2(&stream_, 15 + 32); Review Comment: What does the magic number mean? Should we define it as a constant? ########## src/iceberg/table_metadata.cc: ########## @@ -153,14 +154,70 @@ Result<MetadataFileCodecType> TableMetadataUtil::CodecFromFileName( return MetadataFileCodecType::kNone; } +class GZipDecompressor { + public: + GZipDecompressor() { memset(&stream_, 0, sizeof(stream_)); } + + ~GZipDecompressor() { + if (initialized_) { + inflateEnd(&stream_); + } + } + + Status Init() { + int ret = inflateInit2(&stream_, 15 + 32); + if (ret != Z_OK) { + return IOError("inflateInit2 failed, result:{}", ret); Review Comment: nit: use a new `DecompressError` or `GZipError` ########## src/iceberg/table_metadata.cc: ########## @@ -153,14 +154,70 @@ Result<MetadataFileCodecType> TableMetadataUtil::CodecFromFileName( return MetadataFileCodecType::kNone; } +class GZipDecompressor { + public: + GZipDecompressor() { memset(&stream_, 0, sizeof(stream_)); } + + ~GZipDecompressor() { + if (initialized_) { + inflateEnd(&stream_); + } + } + + Status Init() { + int ret = inflateInit2(&stream_, 15 + 32); + if (ret != Z_OK) { + return IOError("inflateInit2 failed, result:{}", ret); + } + initialized_ = true; + return {}; + } + + Result<std::string> Decompress(const std::string& compressed_data) { + if (compressed_data.empty()) { + return {}; + } + if (!initialized_) { + ICEBERG_RETURN_UNEXPECTED(Init()); + } + stream_.avail_in = static_cast<uInt>(compressed_data.size()); + stream_.next_in = reinterpret_cast<Bytef*>(const_cast<char*>(compressed_data.data())); + + // TODO(xiao.dong) magic buffer, can we get a estimated size from compressed data? + std::vector<char> outBuffer(32 * 1024); Review Comment: ```suggestion std::vector<char> out_buffer(32 * 1024); ``` ########## src/iceberg/table_metadata.cc: ########## @@ -153,14 +154,70 @@ Result<MetadataFileCodecType> TableMetadataUtil::CodecFromFileName( return MetadataFileCodecType::kNone; } +class GZipDecompressor { Review Comment: Should we just define it as a function because it is just an one-off thing? Perhaps declare it in `gzip_internal.h` so we can test it individually? ########## src/iceberg/table_metadata.cc: ########## @@ -153,14 +154,70 @@ Result<MetadataFileCodecType> TableMetadataUtil::CodecFromFileName( return MetadataFileCodecType::kNone; } +class GZipDecompressor { + public: + GZipDecompressor() { memset(&stream_, 0, sizeof(stream_)); } + + ~GZipDecompressor() { + if (initialized_) { + inflateEnd(&stream_); + } + } + + Status Init() { + int ret = inflateInit2(&stream_, 15 + 32); + if (ret != Z_OK) { + return IOError("inflateInit2 failed, result:{}", ret); + } + initialized_ = true; + return {}; + } + + Result<std::string> Decompress(const std::string& compressed_data) { + if (compressed_data.empty()) { + return {}; + } + if (!initialized_) { + ICEBERG_RETURN_UNEXPECTED(Init()); + } + stream_.avail_in = static_cast<uInt>(compressed_data.size()); + stream_.next_in = reinterpret_cast<Bytef*>(const_cast<char*>(compressed_data.data())); + + // TODO(xiao.dong) magic buffer, can we get a estimated size from compressed data? + std::vector<char> outBuffer(32 * 1024); + std::string result; + int ret = 0; + do { + outBuffer.resize(outBuffer.size()); Review Comment: Isn't it redundant? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org