nastra commented on code in PR #9902: URL: https://github.com/apache/iceberg/pull/9902#discussion_r1528029346
########## spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReaderWithBloomFilter.java: ########## @@ -174,170 +156,62 @@ public static Object[][] parameters() { @BeforeAll public static void startMetastoreAndSpark() { - metastore = new TestHiveMetastore(); - metastore.start(); - HiveConf hiveConf = metastore.hiveConf(); - spark = SparkSession.builder() .master("local[2]") - .config("spark.hadoop." + METASTOREURIS.varname, hiveConf.get(METASTOREURIS.varname)) - .enableHiveSupport() + .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") + .config("spark.sql.catalog.local.type", "hadoop") + .config("spark.sql.catalog.local.warehouse", temp.toString()) + .config("spark.sql.defaultCatalog", "local") .getOrCreate(); - catalog = - (HiveCatalog) - CatalogUtil.loadCatalog( - HiveCatalog.class.getName(), "hive", ImmutableMap.of(), hiveConf); - - try { - catalog.createNamespace(Namespace.of("default")); - } catch (AlreadyExistsException ignored) { - // the default namespace already exists. ignore the create error - } + spark.sql("CREATE DATABASE IF NOT EXISTS default"); + spark.sql("USE default"); } @AfterAll - public static void stopMetastoreAndSpark() throws Exception { - catalog = null; - metastore.stop(); - metastore = null; + public static void stopMetastoreAndSpark() { spark.stop(); spark = null; } - protected void createTable(String name, Schema schema) { - table = catalog.createTable(TableIdentifier.of("default", name), schema); - TableOperations ops = ((BaseTable) table).operations(); - TableMetadata meta = ops.current(); - ops.commit(meta, meta.upgradeToFormatVersion(2)); + protected void createTable(String name) throws TableAlreadyExistsException { + Dataset<Row> emptyDf = spark.createDataFrame(Lists.newArrayList(), schema); + CreateTableWriter<Row> createTableWriter = emptyDf.writeTo("default." + name); if (useBloomFilter) { - table - .updateProperties() - .set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id", "true") - .set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id_long", "true") - .set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id_double", "true") - .set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id_float", "true") - .set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id_string", "true") - .set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id_boolean", "true") - .set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id_date", "true") - .set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id_int_decimal", "true") - .set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id_long_decimal", "true") - .set(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id_fixed_decimal", "true") - .commit(); + String[] columns = { + "id", + "id_long", + "id_double", + "id_float", + "id_string", + "id_boolean", + "id_date", + "id_int_decimal", + "id_long_decimal", + "id_fixed_decimal", + "id_nested.nested_id" + }; + for (String column : columns) { + createTableWriter.tableProperty( + PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + column, "true"); + } } - table - .updateProperties() - .set(TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES, "100") // to have multiple row groups - .commit(); - if (vectorized) { - table - .updateProperties() - .set(TableProperties.PARQUET_VECTORIZATION_ENABLED, "true") - .set(TableProperties.PARQUET_BATCH_SIZE, "4") - .commit(); - } - } - - protected void dropTable(String name) { - catalog.dropTable(TableIdentifier.of("default", name)); - } + createTableWriter.tableProperty(PARQUET_ROW_GROUP_SIZE_BYTES, "100"); - private DataFile writeDataFile(OutputFile out, StructLike partition, List<Record> rows) Review Comment: this seems like too many changes just to add a single test. This makes it quite difficult to review the diffset -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org