ndrluis opened a new issue, #495:
URL: https://github.com/apache/iceberg-rust/issues/495

   I'm testing using the [iceberg rest 
image](https://github.com/tabular-io/iceberg-rest-image) from Tabular as a 
catalog.
   
   Here's the docker-compose.yml file:
   
   ```yaml
   version: '3.8'
   
   services:
     rest:
       image: tabulario/iceberg-rest:0.10.0
       environment:
         - AWS_ACCESS_KEY_ID=admin
         - AWS_SECRET_ACCESS_KEY=password
         - AWS_REGION=us-east-1
         - CATALOG_WAREHOUSE=s3://warehouse/
         - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
         - CATALOG_S3_ENDPOINT=http://minio:9000
       depends_on:
         - minio
       ports:
         - "8181:8181"
       networks:
         iceberg_net:
   
     minio:
       image: minio/minio:RELEASE.2024-03-07T00-43-48Z
       environment:
         - MINIO_ROOT_USER=admin
         - MINIO_ROOT_PASSWORD=password
         - MINIO_DOMAIN=minio
       networks:
         iceberg_net:
           aliases:
             - warehouse.minio
       expose:
         - 9001
         - 9000
       ports:
         - "9000:9000"
         - "9001:9001"
       command: [ "server", "/data", "--console-address", ":9001" ]
   
     mc:
       depends_on:
         - minio
       image: minio/mc:RELEASE.2024-03-07T00-31-49Z
       environment:
         - AWS_ACCESS_KEY_ID=admin
         - AWS_SECRET_ACCESS_KEY=password
         - AWS_REGION=us-east-1
       entrypoint: >
         /bin/sh -c "
           until (/usr/bin/mc config host add minio http://minio:9000 admin 
password) do
             echo '...waiting...' && sleep 1;
           done;
           /usr/bin/mc mb minio/warehouse;
           /usr/bin/mc policy set public minio/warehouse;
           tail -f /dev/null
         "
       networks:
         iceberg_net:
   
   networks:
     iceberg_net:
   ```
   
   I created some data with PyIceberg:
   
   ```python
   from pyiceberg.catalog import load_catalog
   import pyarrow as pa
   from pyiceberg.schema import Schema
   from pyiceberg.types import NestedField, StringType, DoubleType
   
   catalog = load_catalog(
       "demo",
       **{
           "type": "rest",
           "uri": "http://localhost:8181";,
           "s3.endpoint": "http://localhost:9000";,
           "s3.access-key-id": "admin",
           "s3.secret-access-key": "password",
           "warehouse": "demo",
       },
   )
   
   catalog.create_namespace_if_not_exists("default")
   
   schema = Schema(
       NestedField(1, "city", StringType(), required=False),
       NestedField(2, "lat", DoubleType(), required=False),
       NestedField(3, "long", DoubleType(), required=False),
   )
   
   tbl = catalog.create_table_if_not_exists("default.cities", schema=schema)
   
   df = pa.Table.from_pylist(
       [
           {"city": "Amsterdam", "lat": 52.371807, "long": 4.896029},
           {"city": "San Francisco", "lat": 37.773972, "long": -122.431297},
           {"city": "Drachten", "lat": 53.11254, "long": 6.0989},
           {"city": "Paris", "lat": 48.864716, "long": 2.349014},
       ],
   )
   
   tbl.append(df)
   ```
   
   And queried with PyIceberg to verify if it's okay:
   
   ```python
   from pyiceberg.catalog import load_catalog
   from pyiceberg.table import Table
   
   catalog = load_catalog(
       "demo",
       **{
           "type": "rest",
           "uri": "http://localhost:8181/";,
           "s3.endpoint": "http://localhost:9000";,
           "s3.access-key-id": "admin",
           "s3.secret-access-key": "password",
           "warehouse": "demo",
       },
   )
   
   tbl: Table = catalog.load_table("default.cities")
   
   res = tbl.scan().to_arrow()
   
   print(len(res))
   ```
   
   It returns 4.
   
   And then with the Rust implementation:
   
   ```rust
   use std::collections::HashMap;
   
   use futures::TryStreamExt;
   use iceberg::{
       io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY},
       Catalog, TableIdent,
   };
   use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig};
   
   #[tokio::main]
   async fn main() {
       // Create catalog
       let config = RestCatalogConfig::builder()
           .uri("http://localhost:8181".to_string())
           .warehouse("demo".to_string())
           .props(HashMap::from([
               (S3_ENDPOINT.to_string(), "http://localhost:9000".to_string()),
               (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
               (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
               (S3_REGION.to_string(), "us-east-1".to_string()),
           ]))
           .build();
   
       let catalog = RestCatalog::new(config);
   
       let table = catalog
           .load_table(&TableIdent::from_strs(["default", "cities"]).unwrap())
           .await
           .unwrap();
   
       let scan = table.scan().select_all().build().unwrap();
       let batch_stream = scan.to_arrow().await.unwrap();
   
       dbg!(scan);
   
       let batches: Vec<_> = batch_stream.try_collect().await.unwrap();
   
       dbg!(batches.len());
   }
   ```
   
   Its returning nothing.
   
   
   We have to define the S3 configurations because the Tabular image does not 
return the S3 credentials during the get config process.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to