[GitHub] [incubator-doris] wyb commented on issue #3010: Spark load etl interface

GitBox Sun, 08 Mar 2020 19:36:12 -0700

wyb commented on issue #3010: Spark load etl interface
URL: 
https://github.com/apache/incubator-doris/issues/3010#issuecomment-596304885
 
 
   update jobconfig.json file format
   
   ```json
   {
        "tables": {
                10014: {
                        "columns": {
                                "k1": {
                                        "default_value": "\\N",
                                        "column_type": "DATETIME",
                                        "is_allow_null": true
                                },
                                "k2": {
                                        "default_value": "0",
                                        "column_type": "SMALLINT",
                                        "is_allow_null": true
                                },
                                "v": {
                                        "default_value": "0",
                                        "column_type": "BIGINT",
                                        "is_allow_null": false
                                }
                        },
                        "indexes": {
                                10014: {
                                        "column_refs": [{
                                                "name": "k1",
                                                "is_key": true,
                                                "aggregation_type": "NONE"
                                        }, {
                                                "name": "k2",
                                                "is_key": true,
                                                "aggregation_type": "NONE"
                                        }, {
                                                "name": "v",
                                                "is_key": false,
                                                "aggregation_type": "NONE"
                                        }],
                                        "distribution_column_refs": ["k1"],
                                        "schema_hash": 1294206574
                                },
                                10017: {
                                        "column_refs": [{
                                                "name": "k1",
                                                "is_key": true,
                                                "aggregation_type": "NONE"
                                        }, {
                                                "name": "v",
                                                "is_key": false,
                                                "aggregation_type": "SUM"
                                        }],
                                        "distribution_column_refs": ["k1"],
                                        "schema_hash": 1294206575
                                }
                        },
                        "partition_info": {
                                "partition_type": "RANGE",
                                "partition_column_refs": ["k2"],
                                "partitions": {
                                        10020: {
                                                "start_keys": [-100],
                                                "end_keys": [10],
                                                "is_max_partition": false,
                                                "bucket_num": 3
                                        }
                                }
                        },
                        "file_groups": [{
                                "partitions": [10020],
                                "file_paths": 
["hdfs://hdfs_host:port/user/palo/test/file"],
                                "file_field_names": ["tmp_k1", "k2"],
                                "value_separator": ",",
                                "line_delimiter": "\n"
                                "column_mappings": {
                                        "k1": {
                                                "function_name": "strftime",
                                                "args": ["%Y-%m-%d %H:%M:%S", 
"tmp_k1"]
                                        }
                                },
                                "where": "k2 > 10",
                                "is_negative": false,
                                "hive_table_name": "hive_db.table"
                        }]
                }
        },
        "output_path": 
"hdfs://hdfs_host:port/user/output/10003/label1/1582599203397",
        "output_file_pattern": 
"label1.%(table_id)d.%(partition_id)d.%(index_id)d.%(bucket)d.%(schema_hash)d"
   }
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[GitHub] [incubator-doris] wyb commented on issue #3010: Spark load etl interface

Reply via email to