作者:JohnBeanLee | 来源:互联网 | 2023-10-11 10:36
一个同学在群里问的这个问题,刚好好久没写过window 的sql了,玩一玩
手上没有环境,一起从简了
使用 datagen 生成数据,id 随机生成,最小值
输出直接到 console 窗口
-- flink window tvf calc pv&uv
create table if not exists datagen_source (
id int
,name string
,sex string
,age int
,birthday string
,proc_time as proctime()
) with (
'connector' = 'datagen'
,'rows-per-second' = '10000'
,'fields.id.kind' = 'random'
,'fields.id.min' = '1'
,'fields.id.max' = '2000000'
);
create table if not exists print_sink(
start_time string
,end_time string
,pv bigint
,uv bigint
) with (
'connector' = 'print'
);
insert into print_sink
select
date_format(window_start, 'HH:mm:ss')
, date_format(window_end, 'HH:mm:ss')
, count(id)
, count(distinct id)
FROM TABLE(
TUMBLE(TABLE datagen_source, DESCRIPTOR(proc_time), INTERVAL '10' SECOND ))
GROUP BY window_start, window_end
union all
select
date_format(window_start, 'HH:mm:ss')
, date_format(window_end, 'HH:mm:ss')
, count(id)
, count(distinct id)
FROM TABLE(
TUMBLE(TABLE datagen_source, DESCRIPTOR(proc_time), INTERVAL '20' SECOND ))
GROUP BY window_start, window_end
union all
select
date_format(window_start, 'HH:mm:ss')
, date_format(window_end, 'HH:mm:ss')
, count(id)
, count(distinct id)
FROM TABLE(
TUMBLE(TABLE datagen_source, DESCRIPTOR(proc_time), INTERVAL '30' SECOND ))
GROUP BY window_start, window_end
;
查看结果:
+I[10:45:00, 10:45:20, 20000, 19900] # 20 s
+I[10:45:10, 10:45:20, 20000, 19913] # 10 s
+I[10:45:00, 10:45:30, 120000, 116420] # 30 s
+I[10:45:20, 10:45:30, 100000, 97497]
+I[10:45:30, 10:45:40, 100000, 97558]
+I[10:45:20, 10:45:40, 200000, 190314]