wgtmac commented on code in PR #108:
URL: https://github.com/apache/iceberg-cpp/pull/108#discussion_r2106412663


##########
src/iceberg/table_metadata.cc:
##########
@@ -153,14 +154,70 @@ Result<MetadataFileCodecType> 
TableMetadataUtil::CodecFromFileName(
   return MetadataFileCodecType::kNone;
 }
 
+class GZipDecompressor {
+ public:
+  GZipDecompressor() { memset(&stream_, 0, sizeof(stream_)); }
+
+  ~GZipDecompressor() {
+    if (initialized_) {
+      inflateEnd(&stream_);
+    }
+  }
+
+  Status Init() {
+    int ret = inflateInit2(&stream_, 15 + 32);

Review Comment:
   What does the magic number mean? Should we define it as a constant?



##########
src/iceberg/table_metadata.cc:
##########
@@ -153,14 +154,70 @@ Result<MetadataFileCodecType> 
TableMetadataUtil::CodecFromFileName(
   return MetadataFileCodecType::kNone;
 }
 
+class GZipDecompressor {
+ public:
+  GZipDecompressor() { memset(&stream_, 0, sizeof(stream_)); }
+
+  ~GZipDecompressor() {
+    if (initialized_) {
+      inflateEnd(&stream_);
+    }
+  }
+
+  Status Init() {
+    int ret = inflateInit2(&stream_, 15 + 32);
+    if (ret != Z_OK) {
+      return IOError("inflateInit2 failed, result:{}", ret);

Review Comment:
   nit: use a new `DecompressError` or `GZipError`



##########
src/iceberg/table_metadata.cc:
##########
@@ -153,14 +154,70 @@ Result<MetadataFileCodecType> 
TableMetadataUtil::CodecFromFileName(
   return MetadataFileCodecType::kNone;
 }
 
+class GZipDecompressor {
+ public:
+  GZipDecompressor() { memset(&stream_, 0, sizeof(stream_)); }
+
+  ~GZipDecompressor() {
+    if (initialized_) {
+      inflateEnd(&stream_);
+    }
+  }
+
+  Status Init() {
+    int ret = inflateInit2(&stream_, 15 + 32);
+    if (ret != Z_OK) {
+      return IOError("inflateInit2 failed, result:{}", ret);
+    }
+    initialized_ = true;
+    return {};
+  }
+
+  Result<std::string> Decompress(const std::string& compressed_data) {
+    if (compressed_data.empty()) {
+      return {};
+    }
+    if (!initialized_) {
+      ICEBERG_RETURN_UNEXPECTED(Init());
+    }
+    stream_.avail_in = static_cast<uInt>(compressed_data.size());
+    stream_.next_in = 
reinterpret_cast<Bytef*>(const_cast<char*>(compressed_data.data()));
+
+    // TODO(xiao.dong) magic buffer, can we get a estimated size from 
compressed data?
+    std::vector<char> outBuffer(32 * 1024);

Review Comment:
   ```suggestion
       std::vector<char> out_buffer(32 * 1024);
   ```



##########
src/iceberg/table_metadata.cc:
##########
@@ -153,14 +154,70 @@ Result<MetadataFileCodecType> 
TableMetadataUtil::CodecFromFileName(
   return MetadataFileCodecType::kNone;
 }
 
+class GZipDecompressor {

Review Comment:
   Should we just define it as a function because it is just an one-off thing? 
Perhaps declare it in `gzip_internal.h` so we can test it individually?



##########
src/iceberg/table_metadata.cc:
##########
@@ -153,14 +154,70 @@ Result<MetadataFileCodecType> 
TableMetadataUtil::CodecFromFileName(
   return MetadataFileCodecType::kNone;
 }
 
+class GZipDecompressor {
+ public:
+  GZipDecompressor() { memset(&stream_, 0, sizeof(stream_)); }
+
+  ~GZipDecompressor() {
+    if (initialized_) {
+      inflateEnd(&stream_);
+    }
+  }
+
+  Status Init() {
+    int ret = inflateInit2(&stream_, 15 + 32);
+    if (ret != Z_OK) {
+      return IOError("inflateInit2 failed, result:{}", ret);
+    }
+    initialized_ = true;
+    return {};
+  }
+
+  Result<std::string> Decompress(const std::string& compressed_data) {
+    if (compressed_data.empty()) {
+      return {};
+    }
+    if (!initialized_) {
+      ICEBERG_RETURN_UNEXPECTED(Init());
+    }
+    stream_.avail_in = static_cast<uInt>(compressed_data.size());
+    stream_.next_in = 
reinterpret_cast<Bytef*>(const_cast<char*>(compressed_data.data()));
+
+    // TODO(xiao.dong) magic buffer, can we get a estimated size from 
compressed data?
+    std::vector<char> outBuffer(32 * 1024);
+    std::string result;
+    int ret = 0;
+    do {
+      outBuffer.resize(outBuffer.size());

Review Comment:
   Isn't it redundant?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to