morningman commented on a change in pull request #3230: Support load json-data into Doris by RoutineLoad or StreamLoad URL: https://github.com/apache/incubator-doris/pull/3230#discussion_r410710574
########## File path: docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md ########## @@ -301,6 +309,84 @@ under the License. "property.client.id" = "my_client_id" ); + 4. 为 example_db 的 example_tbl 创建一个名为 test1 的 Kafka 例行导入任务,导入的简单json数据。 + 1)数据样例, doris_data为固定关键字 + { + "doris_data":[ + {"category":"a9jadhx","author":"test","price":895}, + {"category":"axdfa1","author":"EvelynWaugh","price":1299} + ] + } + 2) 创建任务,可不设置jsonpath或者jsonpath_file + CREATE ROUTINE LOAD example_db.test1 ON example_tbl + COLUMNS(category, author, price) + PROPERTIES + ( + "desired_concurrent_number"="3", + "max_batch_interval" = "20", + "max_batch_rows" = "300000", + "max_batch_size" = "209715200", + "strict_mode" = "false", + "format" = "json" + ) + FROM KAFKA + ( + "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", + "kafka_topic" = "my_topic", + "kafka_partitions" = "0,1,2", + "kafka_offsets" = "0,0,0" + ); + + 5. 通过jsonpath参数,为 example_db 的 example_tbl 创建一个名为 test1 的 Kafka 例行导入任务,导入的数据格式为json。 + + CREATE ROUTINE LOAD example_db.test1 ON example_tbl + COLUMNS(category, author, price) + PROPERTIES + ( + "desired_concurrent_number"="3", + "max_batch_interval" = "20", + "max_batch_rows" = "300000", + "max_batch_size" = "209715200", + "strict_mode" = "false", + "format" = "json", + "jsonpath" = "{\"jsonpath\":[{\"column\":\"category\",\"value\":\"$.store.book.category\"},{\"column\":\"author\",\"value\":\"$.store.book.author\"},,{\"column\":\"price\",\"value\":\"$.store.book.price\"}]}" + ) + FROM KAFKA + ( + "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", + "kafka_topic" = "my_topic", + "kafka_partitions" = "0,1,2", + "kafka_offsets" = "0,0,0" + ); + + 6. 通过jsonpath_file参数, 为 example_db 的 example_tbl 创建一个名为 test1 的 Kafka 例行导入任务,导入的数据格式为json。 + 1)通过create file导入jsonpath文件到Doris集群中,然后通过show file查看文件id以及md5, 例如: + mysql> show file; + +-------+--------------------------+---------+-----------------+----------+-----------+----------------------------------+ + | Id | DbName | Catalog | FileName | FileSize | IsContent | MD5 | + +-------+--------------------------+---------+-----------------+----------+-----------+----------------------------------+ + | 43017 | default_cluster:mediavad | kafka | myjsonpath.json | 215 | true | 3f3ab257be8a422e0044abe5ed51d410 | + +-------+--------------------------+---------+-----------------+----------+-----------+----------------------------------+ + 2)创建导入任务 + CREATE ROUTINE LOAD example_db.test1 ON example_tbl + COLUMNS(category, author, price) + PROPERTIES + ( + "desired_concurrent_number"="3", + "max_batch_interval" = "20", + "max_batch_rows" = "300000", + "max_batch_size" = "209715200", + "strict_mode" = "false", + "format" = "json", + "jsonpath_file" = "43017:3f3ab257be8a422e0044abe5ed51d410" Review comment: should be: ```suggestion "jsonpath_file" = "FILE:myjsonpath.json" ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org