目录
1.window function-窗口函数
2.其他可选API
3.代码演示
3.1 TimeWindow
3.2 CountWindow
1.window function-窗口函数
window function 定义了要对窗口中收集的数据做的计算操作,主要可以分为两类:
- 增量聚合函数(incremental aggregation functions) 每条数据到来就进行计算,保持一个简单的状态。典型的增量聚合函数有 ReduceFunction, AggregateFunction。(来一个处理一个)
- 全窗口函数(full window functions) 先把窗口所有数据收集起来,等到计算的时候会遍历所有数据。 ProcessWindowFunction和WindowFunction就是全窗口函数。(一个窗口一起处理)
2.其他可选API
- .trigger() —— 触发器 定义 window 什么时候关闭,触发计算并输出结果
- .evitor() —— 移除器 定义移除某些数据的逻辑
- .allowedLateness() —— 允许处理迟到的数据
- .sideOutputLateData() —— 将迟到的数据放入侧输出流
- .getSideOutput() —— 获取侧输出流
3.代码演示
3.1 TimeWindow时间窗口
package com.atguigu.apitest.window;import com.atguigu.apitest.beans.SensorReading;
import org.apache.commons.collections.IteratorUtils;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.EventTimeSessionWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;import java.util.Arrays;
import java.util.Collections;public class WindowTest1_TimeWindow {public static void main(String[] args) throws Exception {StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();env.setParallelism(1);// // 从文件读取数据
// DataStream inputStream = env.readTextFile("D:\\Projects\\BigData\\FlinkTutorial\\src\\main\\resources\\sensor.txt");// socket文本流DataStream inputStream = env.socketTextStream("localhost", 7777);// 转换成SensorReading类型DataStream dataStream = inputStream.map(line -> {String[] fields = line.split(",");return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));});// 开窗测试// 1. 增量聚合函数DataStream resultStream = dataStream.keyBy("id")
// .countWindow(10, 2);
// .window(EventTimeSessionWindows.withGap(Time.minutes(1)));
// .window(TumblingProcessingTimeWindows.of(Time.seconds(15))).timeWindow(Time.seconds(15))//聚合统计总数.aggregate(new AggregateFunction() {@Overridepublic Integer createAccumulator() {return 0;}@Overridepublic Integer add(SensorReading value, Integer accumulator) {return accumulator + 1;}@Overridepublic Integer getResult(Integer accumulator) {return accumulator;}@Overridepublic Integer merge(Integer a, Integer b) {return a + b;}});// 2. 全窗口函数SingleOutputStreamOperator> resultStream2 = dataStream.keyBy("id").timeWindow(Time.seconds(15))
// .process(new ProcessWindowFunction() {
// }).apply(new WindowFunction, Tuple, TimeWindow>() {&#64;Overridepublic void apply(Tuple tuple, TimeWindow window, Iterable input, Collector> out) throws Exception {String id &#61; tuple.getField(0);Long windowEnd &#61; window.getEnd();Integer count &#61; IteratorUtils.toList(input.iterator()).size();out.collect(new Tuple3<>(id, windowEnd, count));}});// 3. 其它可选APIOutputTag outputTag &#61; new OutputTag("late") {};SingleOutputStreamOperator sumStream &#61; dataStream.keyBy("id").timeWindow(Time.seconds(15))
// .trigger()
// .evictor().allowedLateness(Time.minutes(1)).sideOutputLateData(outputTag).sum("temperature");sumStream.getSideOutput(outputTag).print("late");resultStream2.print();env.execute();}
}
3.2 CountWindow计数窗口
package com.atguigu.apitest.window;
import com.atguigu.apitest.beans.SensorReading;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;public class WindowTest2_CountWindow {public static void main(String[] args) throws Exception {StreamExecutionEnvironment env &#61; StreamExecutionEnvironment.getExecutionEnvironment();env.setParallelism(1);// // 从文件读取数据
// DataStream inputStream &#61; env.readTextFile("D:\\Projects\\BigData\\FlinkTutorial\\src\\main\\resources\\sensor.txt");// socket文本流DataStream inputStream &#61; env.socketTextStream("localhost", 7777);// 转换成SensorReading类型DataStream dataStream &#61; inputStream.map(line -> {String[] fields &#61; line.split(",");return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));});// 开计数窗口测试SingleOutputStreamOperator avgTempResultStream &#61; dataStream.keyBy("id").countWindow(10, 2).aggregate(new MyAvgTemp());avgTempResultStream.print();env.execute();}public static class MyAvgTemp implements AggregateFunction, Double>{&#64;Overridepublic Tuple2 createAccumulator() {return new Tuple2<>(0.0, 0);}&#64;Overridepublic Tuple2 add(SensorReading value, Tuple2 accumulator) {return new Tuple2<>(accumulator.f0 &#43; value.getTemperature(), accumulator.f1 &#43; 1);}&#64;Overridepublic Double getResult(Tuple2 accumulator) {return accumulator.f0 / accumulator.f1;}&#64;Overridepublic Tuple2 merge(Tuple2 a, Tuple2 b) {return new Tuple2<>(a.f0 &#43; b.f0, a.f1 &#43; b.f1);}}
}