addSource可以实现读取第三方数据源的数据。
1. 此处以读取kafka数据为例 (kafka集群安装参考 https://blog.csdn.net/zhuzuwei/article/details/107136796)
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import java.util.Properties;public class KafkaSouceReview {public static void main(String[] args) throws Exception{// 1.获取flink流计算的运行环境StreamExecutionEnvironment env &#61; StreamExecutionEnvironment.getExecutionEnvironment();//Kafka propsProperties properties &#61; new Properties();//指定Kafka的Broker地址properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.***.***:9092,192.168.***.***:9092,192.168.***.***:9092");//指定组IDproperties.put(ConsumerConfig.GROUP_ID_CONFIG, "kafka_test_group1");//如果没有记录偏移量&#xff0c;第一次从最开始消费properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");// 2.从kafka读取数据FlinkKafkaConsumer kafkaSource &#61; new FlinkKafkaConsumer<>("kafka_test1", new SimpleStringSchema(), properties);DataStreamSource stringDataStreamSource &#61; env.addSource(kafkaSource);//3.调用SinkstringDataStreamSource.print();//4.启动流计算env.execute("KafkaSouceReview");}
}
FlinkKafkaConsumer方法源码如下&#xff0c;第二个参数要传入DeSerializationSchema的实现类的实例。
public FlinkKafkaConsumer(String topic, DeserializationSchema valueDeserializer, Properties props) {this(Collections.singletonList(topic), valueDeserializer, props);
}
DeSerializationSchema实现类如下&#xff0c;最常用的就是SimpleStringSchema.
2. 启动zookeeper集群&#xff1a;依次在不同节点执行 ./zkServer.sh start
3. 启动kafka集群&#xff1a;依次在不同节点执行./kafka-server-start.sh -daemon /usr/local/kafka_2.11-2.1.1/config/server.properties &
在后台启动kafka
4. 创建topic kafka_test1
5. 启动并写数据到kafkatopic
6. 启动flink的java程序&#xff0c;成功读取到数据