Nation #
----- 建议通过 Spark-SQL 执行
DROP TABLE IF EXISTS nation;
CREATE TABLE nation (
n_nationkey INT NOT NULL,
n_name VARCHAR(25) NOT NULL,
n_regionkey INT NOT NULL,
n_comment VARCHAR(256)
)
USING hudi
OPTIONS (
type = 'cow',
primaryKey = 'n_nationkey'
) LOCATION 'file:///svr/data/hudi/tcph/nation';
----- Hudi相关配置项,也可以在如下命令行中通过 --hoodie-conf 配置
# cat /svr/hudi/tpch/nation.properties
hoodie.embed.timeline.server=false
hoodie.datasource.write.recordkey.field=n_nationkey
hoodie.streamer.source.dfs.root=file:///svr/hudi/tpch/nation/
hoodie.streamer.csv.sep=|
hoodie.streamer.csv.header=false
hoodie.streamer.schemaprovider.target.schema.file=file:///svr/hudi/tpch/nation.avsc
hoodie.streamer.schemaprovider.source.schema.file=file:///svr/hudi/tpch/nation.avsc
----- 表结构信息
# cat /svr/hudi/tpch/nation.avsc
{
"type":"record",
"name":"nation",
"fields":[{
"name":"n_nationkey",
"type":"int"
}, {
"name":"n_name",
"type":"string"
}, {
"name":"n_regionkey",
"type":"int"
}, {
"name":"n_comment",
"type":"string"
}]
}
----- 执行数据导入
# spark-submit --master local[2] --deploy-mode client --driver-memory 20g \
--class org.apache.hudi.utilities.streamer.HoodieStreamer \
/svr/hudi/hudi-utilities-bundle_2.12-0.14.1.jar \
--props file:///svr/hudi/tpch/nation.properties \
--source-class org.apache.hudi.utilities.sources.CsvDFSSource \
--schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider \
--table-type COPY_ON_WRITE --op BULK_INSERT \
--target-base-path file:///svr/data/hudi/tcph/nation \
--target-table tpch.nation