作者:OP尋一 | 来源:互联网 | 2023-08-16 09:51
Flink读取数据的四种方式
- 准备工作
- 从集合中读取
- 从文件中读取
- 从Kafka中读取
- 自定义读取方式
准备工作
- Flink默认运行时是没有日志的,若想要出现日志,需要在pom文件添加
<dependency><groupId>org.slf4jgroupId><artifactId>slf4j-log4j12artifactId><version>1.7.7version>dependency><dependency><groupId>log4jgroupId><artifactId>log4jartifactId><version>1.2.17version>dependency>
- 在resource目录下创建文件log4j.properties,并且在文件中添加如下内容
log4j.rootLogger=info,console log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.Threshold=INFO
log4j.appender.console.ImmediateFlush=true
log4j.appender.console.Target=System.out
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=[%-5p] %d(%r) --> [%t] %l: %m %x %n
package beans;
public class SenSorReading {private String id;private Long timeStamp;private Double temperature;public SenSorReading() {}public SenSorReading(String id, Long timeStamp, Double temperature) {this.id = id;this.timeStamp = timeStamp;this.temperature = temperature;}public String getId() {return id;}public void setId(String id) {this.id = id;}public Long getTimeStamp() {return timeStamp;}public void setTimeStamp(Long timeStamp) {this.timeStamp = timeStamp;}public Double getTemperature() {return temperature;}public void setTemperature(Double temperature) {this.temperature = temperature;}@Overridepublic String toString() {return "SenSorReading{" +"id=&#39;" + id + &#39;\&#39;&#39; +", timeStamp=" + timeStamp +", temperature=" + temperature +&#39;}&#39;;}
}
跳转顶部
从集合中读取
package source;import beans.SenSorReading;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;import java.util.Arrays;public class SourceCollection01 {public static void main(String[] args) throws Exception {StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();env.setParallelism(1);DataStream<SenSorReading> sensorDataStream = env.fromCollection( Arrays.asList(new SenSorReading("sensor_1", 1547718199L, 35.8),new SenSorReading("sensor_6", 1547718201L, 15.4),new SenSorReading("sensor_7", 1547718202L, 6.7),new SenSorReading("sensor_10", 1547718205L, 38.1)));DataStreamSource<Integer> integerDataStreamSource = env.fromElements(1, 2, 3, 4, 5);sensorDataStream.print("data");integerDataStreamSource.print("int");env.execute("dataFromCollection");}
}
- 结果展示
跳转顶部
从文件中读取
sensor_1,1547718199,35.8
sensor_6,1547718201,15.4
sensor_7,1547718202,6.7
sensor_10,1547718205,38.1
package source;import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;public class SourceFile01 {public static void main(String[] args) throws Exception {StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();env.setParallelism(1);DataStreamSource<String> stringDataStreamSource = env.readTextFile("src/main/resources/sensor.txt");stringDataStreamSource.print();env.execute();}
}
- 结果展示
跳转顶部
从Kafka中读取
-
先打开Linux虚拟机中的zookeeper、Kafka和Flink
-
创建一个主题first:bin/kafka-topics.sh --create --zookeeper a:2181 --replication-factor 3 --partitions 1 --topic first
-
将主题弄成生产者模式:bin/kafka-console-producer.sh --broker-list a:9092 --topic first
-
具体代码如下
package source;import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;import java.util.Properties;public class SourceKafka01 {public static void main(String[] args) throws Exception {StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();Properties properties = new Properties();properties.setProperty("bootstrap.servers", "a:9092");properties.setProperty("group.id", "consumer-group");properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");properties.setProperty("auto.offset.reset", "latest");DataStreamSource<String> sensor = env.addSource(new FlinkKafkaConsumer011<String>("first", new SimpleStringSchema(), properties));sensor.print();env.execute();}
}
- 结果展示
- 注意:代码中的a是Linux虚拟机的主机名,一定要修改Windows里面的host文件,修改映射(即使使用IP地址也不行!!!)
跳转顶部
自定义读取方式
public static class MySensorSource implements SourceFunction<SenSorReading> {private boolean running = true;@Overridepublic void run(SourceContext<SenSorReading> sourceContext) throws Exception {Random random = new Random();HashMap<String, Double> sensorTempMap = new HashMap<>();for (int i = 0; i < 10; i++) {sensorTempMap.put("sensor_" + (i + 1), 60 + random.nextGaussian() * 20);}while (running) {for (String sensorId : sensorTempMap.keySet()) {Double newtemp = sensorTempMap.get(sensorId) + random.nextGaussian();sensorTempMap.put(sensorId, newtemp);sourceContext.collect(new SenSorReading(sensorId, System.currentTimeMillis(), newtemp));}}Thread.sleep(1000);}@Overridepublic void cancel() {running = false;}}
import beans.SenSorReading;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;import java.util.HashMap;
import java.util.Random;public class SourceUDF01 {public static void main(String[] args) throws Exception {StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();DataStreamSource<SenSorReading> dataStreamSource = env.addSource(new MySensorSource());dataStreamSource.print();env.execute();}
}
- 结果展示
跳转顶部