niit110,192.168.215.131 - - [28/May/2019:18:11:44 +0800] "GET /shop/detail.html?id=402857036a2831e001kshdksdsdk89912 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"niit110,192.168.215.131 - - [28/May/2019:18:11:44 +0800] "GET /shop/detail.html?id=402857036a2831e001kshdksdsdk89923 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"niit110,192.168.215.131 - - [28/May/2019:18:11:44 +0800] "GET /shop/detail.html?id=402857036a2831e001kshdksdsdk89933 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"niit110,192.168.215.131 - - [28/May/2019:18:11:44 +0800] "GET /shop/detail.html?id=402857036a2831e001kshdksdsdk89933 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"niit110,192.168.215.131 - - [28/May/2019:18:11:44 +0800] "GET /shop/detail.html?id=402857036a2831e001kshdksdsdk89944 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"niit110,192.168.215.131 - - [28/May/2019:18:11:44 +0800] "GET /shop/detail.html?id=402857036a2831e001kshdksdsdk89944 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"niit110,192.168.215.131 - - [28/May/2019:18:11:44 +0800] "GET /shop/detail.html?id=402857036a2831e001kshdksdsdk89912 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"niit110,192.168.215.131 - - [28/May/2019:18:11:44 +0800] "GET /shop/detail.html?id=402857036a2831e001kshdksdsdk89923 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"niit110,192.168.215.131 - - [28/May/2019:18:11:44 +0800] "GET /shop/detail.html?id=402857036a2831e001kshdksdsdk89933 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"niit110,192.168.215.131 - - [28/May/2019:18:11:44 +0800] "GET /shop/detail.html?id=402857036a2831e001kshdksdsdk89933 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"
在Idea或eclipse中创建maven项目
在pom.xml中添加hadoop依赖
<dependency><groupId>org.apache.hadoopgroupId><artifactId>hadoop-commonartifactId><version>2.7.3version>dependency><dependency><groupId>org.apache.hadoopgroupId><artifactId>hadoop-hdfsartifactId><version>2.7.3version>dependency><dependency><groupId>org.apache.hadoopgroupId><artifactId>hadoop-mapreduce-client-commonartifactId><version>2.7.3version>dependency><dependency><groupId>org.apache.hadoopgroupId><artifactId>hadoop-mapreduce-client-coreartifactId><version>2.7.3version>dependency>
添加log4j.properties文件在资源目录下即resources&#xff0c;文件内容如下&#xff1a;
### 配置根 ###log4j.rootLogger &#61; debug,console,fileAppender## 配置输出到控制台 ###log4j.appender.console &#61; org.apache.log4j.ConsoleAppenderlog4j.appender.console.Target &#61; System.outlog4j.appender.console.layout &#61; org.apache.log4j.PatternLayoutlog4j.appender.console.layout.ConversionPattern &#61; %d{ABSOLUTE} %5p %c:%L - %m%n### 配置输出到文件 ###log4j.appender.fileAppender &#61; org.apache.log4j.FileAppenderlog4j.appender.fileAppender.File &#61; logs/logs.loglog4j.appender.fileAppender.Append &#61; falselog4j.appender.fileAppender.Threshold &#61; DEBUG,INFO,WARN,ERRORlog4j.appender.fileAppender.layout &#61; org.apache.log4j.PatternLayoutlog4j.appender.fileAppender.layout.ConversionPattern &#61; %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
编写文本类型的mapper即LogMapper
import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;import java.util.regex.Matcher;import java.util.regex.Pattern;public class LogMapper extends Mapper<LongWritable, Text,Text, IntWritable> {// 按指定模式在字符串查找String pattern &#61; "\\&#61;[0-9a-z]*";// 创建 Pattern 对象Pattern r &#61; Pattern.compile(pattern);protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {//niit110,192.168.215.131 - - [28/May/2019:18:11:44 &#43;0800] "GET /shop/detail.html?id&#61;402857036a2831e001kshdksdsdk89912 HTTP/1.0" 200 4391 "-" "ApacheBench/2.3" "-"String data &#61; value.toString();// 现在创建 matcher 对象Matcher m &#61; r.matcher(data);if (m.find()) {String idStr &#61; m.group(0);String id &#61; idStr.substring(1);context.write(new Text(id),new IntWritable(1));}}}
编写reducer类
import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;public class LogReducer extends Reducer<Text, IntWritable,Text, IntWritable> {&#64;Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {int sum &#61; 0;for (IntWritable v: values) {sum &#43;&#61; v.get();}context.write(key,new IntWritable(sum));}}
编写Driver类
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class LogJob {public static void main(String[] args) throws Exception {Job job &#61; Job.getInstance(new Configuration());job.setJarByClass(LogJob.class);job.setMapperClass(LogMapper.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setReducerClass(LogReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.setInputPaths(job,new Path("F:\\NIIT\\access2.log"));FileOutputFormat.setOutputPath(job,new Path("F:\\NIIT\\logs\\002"));boolean completion &#61; job.waitForCompletion(true);}}
本地运行代码&#xff0c;测试下结果正确与否&#xff0c;参考结果如下&#xff1a;
402857036a2831e001kshdksdsdk89912 2402857036a2831e001kshdksdsdk89923 2402857036a2831e001kshdksdsdk89933 4402857036a2831e001kshdksdsdk89944 2
本地运行测试结果正确后&#xff0c;需要对Driver类输出部分代码进行修改&#xff0c;具体修改如下&#xff1a; FileOutputFormat.setOutputPath(job,new Path(args[0]));
修改Job中【数据库】相关的信息
将程序打成jar包&#xff0c;需要在pom.xml中配置打包插件
<build><plugins><plugin><groupId>org.apache.maven.pluginsgroupId><artifactId> maven-assembly-plugin artifactId><configuration><descriptorRefs><descriptorRef>jar-with-dependenciesdescriptorRef>descriptorRefs>configuration><executions><execution><id>make-assemblyid><phase>packagephase><goals><goal>singlegoal>goals>execution>executions>plugin>plugins>build>
按照如下图所示进行操作
提交集群运行&#xff0c;执行如下命令&#xff1a;
hadoop jar packagedemo-1.0-SNAPSHOT.jar com.niit.mr.EmpJob /datas/emp.csv /output/emp/
至此&#xff0c;所有的步骤已经完成&#xff0c;大家可以试试&#xff0c;祝大家好运~~~~