jiayuasu commented on code in PR #2776:
URL: https://github.com/apache/sedona/pull/2776#discussion_r2982611075


##########
spark/common/src/test/scala/org/apache/sedona/sql/OsmReaderTest.scala:
##########
@@ -232,6 +232,112 @@ class OsmReaderTest extends TestBaseScala with Matchers {
       relationsList should contain theSameElementsAs expectedRelationsList
     }
 
+    it("should parse metadata fields (changeset, timestamp, uid, user, 
version)") {
+      val osmData = sparkSession.read
+        .format("osmpbf")
+        .load(monacoPath)
+
+      // All entities should have version, timestamp, changeset populated
+      val totalCount = osmData.count()
+      val withMetadata = osmData
+        .filter("version is not null and timestamp is not null and changeset 
is not null")
+        .count()
+
+      withMetadata shouldEqual totalCount
+
+      // Verify timestamp values are reasonable (after year 2000, before year 
2100)
+      val timestamps = osmData
+        .selectExpr("min(timestamp)", "max(timestamp)")
+        .collect()
+        .head
+
+      val minTimestamp = timestamps.getTimestamp(0)
+      val maxTimestamp = timestamps.getTimestamp(1)
+
+      minTimestamp.after(java.sql.Timestamp.valueOf("2000-01-01 00:00:00")) 
shouldBe true
+      maxTimestamp.before(java.sql.Timestamp.valueOf("2100-01-01 00:00:00")) 
shouldBe true
+
+      // Verify version is positive
+      val minVersion = osmData
+        .selectExpr("min(version)")
+        .collect()
+        .head
+        .getInt(0)
+
+      minVersion should be >= 1
+
+      // Verify changeset is non-negative
+      val minChangeset = osmData
+        .selectExpr("min(changeset)")
+        .collect()
+        .head
+        .getLong(0)
+
+      minChangeset should be >= 0L
+
+      // Verify metadata works for each entity kind
+      for (kind <- Seq("node", "way", "relation")) {
+        val kindData = osmData.filter(s"kind == '$kind'")
+        val kindWithMeta = kindData
+          .filter("version is not null and timestamp is not null")
+          .count()
+
+        kindWithMeta shouldEqual kindData.count()
+      }
+    }
+
+    it("should include metadata fields in schema for dense nodes") {
+      val denseData = sparkSession.read
+        .format("osmpbf")
+        .load(densePath)
+
+      // Verify schema includes the new fields
+      val fieldNames = denseData.schema.fieldNames
+      fieldNames should contain("changeset")
+      fieldNames should contain("timestamp")
+      fieldNames should contain("uid")
+      fieldNames should contain("user")
+      fieldNames should contain("version")
+      fieldNames should contain("visible")
+
+      // Verify that at least one dense node has populated metadata values
+      val nodeWithMetadata = denseData
+        .where(col("kind") === "node")
+        .select("changeset", "timestamp", "uid", "user", "version", "visible")
+        .head()
+
+      val changeset = nodeWithMetadata.getAs[Long]("changeset")
+      val timestamp = nodeWithMetadata.getAs[Long]("timestamp")
+      val uid = nodeWithMetadata.getAs[Long]("uid")
+      val user = nodeWithMetadata.getAs[String]("user")
+      val version = nodeWithMetadata.getAs[Long]("version")

Review Comment:
   Fixed. Reading timestamp as `java.sql.Timestamp`, and uid/version as `Int` 
to match the SchemaProvider types (TimestampType, IntegerType).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to