Hudi 示例样板

2024-05-20 warehouse hudi example

MoR

可以用来测试 RO/RT 等方式,测试发现,只有当某个 Parquet 文件的修改发生重叠的时候,才会生成 Log 文件,否则可能只是生成新的 FileGroup

CREATE TABLE hudi_mor(
  id INT,
  name STRING,
  price DOUBLE,
  ts LONG
)
USING hudi
OPTIONS (
  type = 'mor',
  primaryKey = 'id',
  preCombineField = 'ts'
) LOCATION 'file:///opt/data/hudi/example/mor';

----- 初始化时明确默认参数值
set hoodie.clean.automatic=false;
set hoodie.compact.inline=true;
set hoodie.compact.inline.max.delta.commits=20;
set hoodie.log.compaction.enable=false;
set hoodie.log.compaction.inline=false;
----- 开启增量(Minor)日志压缩
set hoodie.log.compaction.enable=true;
set hoodie.log.compaction.inline=true;


INSERT INTO hudi_mor VALUES(1, 'foobar', 10.0, 1000), (2, 'hudi', 10.0, 1000);
INSERT INTO hudi_mor VALUES(3, 'hello', 10.0, 1000), (4, 'test', 10.0, 1000);
INSERT INTO hudi_mor VALUES(5, 'spark', 10.0, 1000), (6, 'flink', 10.0, 1000);
----- RO/RT读取数据相同

INSERT INTO hudi_mor VALUES(1, 'jerry', 10.0, 1000), (2, 'hoodie', 10.0, 1000);
INSERT INTO hudi_mor VALUES(1, 'tom', 10.0, 1000);
----- RT读取最新数据

INSERT OVERWRITE hudi_mor VALUES(1, 'jerry', 10.0, 1000), (2, 'hoodie', 10.0, 1000);
INSERT INTO hudi_mor VALUES(5, 'spark', 10.0, 1000), (6, 'flink', 10.0, 1000);
----- 重新覆盖原数据

不开启元数据

CREATE TABLE hudi_without_metadata(
  id INT,
  name STRING,
  price DOUBLE
)
USING hudi
OPTIONS (
  type = 'mor',
  primaryKey = 'id',
  'hoodie.metadata.enable' = 'false'
) LOCATION '/tmp/test/hudi_without_metadata';
INSERT INTO hudi_without_metadata_table VALUES(1, 'foobar', 10.0), (2, 'hudi', 10.0),
(3, 'hello', 10.0), (4, 'test', 10.0), (5, 'spark', 10.0), (6, 'flink', 10.0);

表结构变更

----- 注意,该配置不能放到建表的OPTIONS中
SET hoodie.schema.on.read.enable = true;
CREATE TABLE hudi_schema_evolution(
  id INT,
  name STRING,
  price DOUBLE
)
USING hudi
OPTIONS (
  type = 'mor',
  primaryKey = 'id',
  'hoodie.metadata.enable' = 'false'
) LOCATION 'file:///opt/data/hudi/example/schema_evolution';
INSERT INTO hudi_schema_evolution VALUES(1, 'foobar', 10.0), (2, 'hudi', 10.0), (3, 'hello', 10.0);
----- 新增列
ALTER TABLE hudi_schema_evolution ADD COLUMNS(gender STRING);
INSERT INTO hudi_schema_evolution VALUES(4, 'test', 10.0, 'MALE');
----- 删除列
ALTER TABLE hudi_schema_evolution DROP COLUMN gender;
----- 修改列名
ALTER TABLE hudi_schema_evolution RENAME COLUMN name TO names;

对于 StarRocks 来说,修改列名是不支持的。