1、原始数据
2、使用java程序
1)新建项目
2)导包
hadoop-2.7.3\share\hadoop\mapreduce
+hsfs的那些包
+common
3、写项目
1)实体类
package com.zy.flow;import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;public class Flow implements Writable{private Text phone;private LongWritable upflow;private LongWritable downflow;private LongWritable sumflow;//这个对象以后要在集群中传输,所以要可序列化//序列化反序列化顺序要一致@Override//反序列化时会调用该方法public void readFields(DataInput in) throws IOException {phone=new Text(in.readUTF());upflow=new LongWritable(in.readLong());downflow=new LongWritable(in.readLong());sumflow=new LongWritable(in.readLong());}@Override//序列化时会调用该方法public void write(DataOutput out) throws IOException {out.writeUTF(phone.toString());out.writeLong(upflow.get());out.writeLong(downflow.get());out.writeLong(sumflow.get());}public Text getPhone() {return phone;}public void setPhone(Text phone) {this.phone = phone;}public LongWritable getUpflow() {return upflow;}public void setUpflow(LongWritable upflow) {this.upflow = upflow;}public LongWritable getDownflow() {return downflow;}public void setDownflow(LongWritable downflow) {this.downflow = downflow;}public LongWritable getSumflow() {return sumflow;}public void setSumflow(LongWritable sumflow) {this.sumflow = sumflow;}public Flow() {}public Flow(Text phone, LongWritable upflow, LongWritable downflow, LongWritable sumflow) {super();this.phone = phone;this.upflow = upflow;this.downflow = downflow;this.sumflow = sumflow;}public Flow(LongWritable upflow, LongWritable downflow, LongWritable sumflow) {super();this.upflow = upflow;this.downflow = downflow;this.sumflow = sumflow;}@Overridepublic String toString() {return upflow+"\t"+downflow+"\t"+sumflow;}}
2)FlowMap类
package com.zy.flow;import java.io.IOException;import javax.security.auth.callback.LanguageCallback;import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;public class FlowMap extends Mapper
}
}
3)Part(分区)类
package com.zy.flow;
import java.util.HashMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;public class Part extends Partitioner
HashMap
}//在这个逻辑下partition分了6个区,所以以后要指定6个reducetask
}
4)FlowReduce类
package com.zy.flow;import java.io.IOException;import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;public class FlowReduce extends Reducer
5)FlowApp类
package com.zy.flow;import java.io.IOException;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class FlowApp {public static void main(String[] args) throws Exception {//创建配置对象Configuration configuration = new Configuration();//得到job实例Job job = Job.getInstance(configuration);//指定job运行类job.setJarByClass(FlowApp.class);//指定job中的mapperjob.setMapperClass(FlowMap.class);//指定mapper中的输出键和值类型job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Flow.class);//指定job中的reducerjob.setReducerClass(FlowReduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Flow.class);//-----//指定Partitioner使用的类job.setPartitionerClass(Part.class);//指定ReduceTask数量job.setNumReduceTasks(6);//-----//指定输入文件FileInputFormat.setInputPaths(job, new Path(args[0]));//运行时填入参数//指定输出文件FileOutputFormat.setOutputPath(job, new Path(args[1]));//提交作业job.waitForCompletion(true);}}
4、运行
1)打包
2)上传到linux
3)运行