某去網的mapreduce面試題

20140510 09:17:19 uuid-01 search-dancheng dep=北京&arr=上海&date=20140529&pnvm=0
20140510 09:18:20 uuid-02 search-wangFan dep=北京&arr=上海&sdate=20140529&edate=20140605
20140510 09:18:23 uuid-01 detail dep=北京&arr=上海&date=20140529&fcode=CA1810
20140510 09:20:29 uuid-02 detail dep=北京&arr=上海&date=20140529&fcode=CA1810
20140510 09:21:19 uuid-01 submit dep=北京&arr=上海&date=20140529&fcode=CA1810&price=1280
20140510 09:23:19 uuid-03 search-dancheng dep=北京&arr=廣州&date=20140529&pnvm=0
20140510 09:25:19 uuid-04 search-dancheng dep=北京&arr=西安&date=20140529&pnvm=0
20140510 09:25:30 uuid-05 search-dancheng dep=北京&arr=天津&date=20140529&pnvm=0
20140510 09:26:29 uuid-04 detail dep=北京&arr=西安&上海&date=20140529&fcode=CA1810
20140510 09:28:19 uuid-06 submit dep=北京&arr=拉薩&date=20140529&fcode=CA1810&price=2260java

 

import org.apache.hadoop.conf.Configuration;apache

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
app

import beifeng.test.com.bigdata.topN.JobMain;ide

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;oop

/**
* Created by Administrator on 2015/12/5.
*/
public class OrderOperationJob extends Configured implements Tool {
public static class PairWritable implements WritableComparable<PairWritable> {
public PairWritable() {}
public PairWritable(String first,String second,String third) {
this.first = first;
this.second = second;
this.third=third;
}ui

@Override
public String toString() {
return "PairWritable{" +
"first='" + first + '\'' +
", second='" + second + '\'' +
", third='" + third + '\'' +
'}';
}this

public void set(String first,String second,String third) {
this.first = first;
this.second = second;
this.third = third;
}code

public void setFirst(String first) {
this.first = first;
}orm

public void setSecond(String second) {
this.second = second;
}ip

private String first;

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof PairWritable)) return false;

PairWritable that = (PairWritable) o;

if (!getFirst().equals(that.getFirst())) return false;
if (!getSecond().equals(that.getSecond())) return false;
return getThird().equals(that.getThird());

}

@Override
public int hashCode() {
int result = getFirst().hashCode();
result = 31 * result + getSecond().hashCode();
result = 31 * result + getThird().hashCode();
return result;
}

private String second;
private String third;

public String getThird() {
return third;
}

public void setThird(String third) {
this.third = third;
}

public String getFirst() {
return first;
}
public String getSecond() {
return second;
}

public void readFields(DataInput in) throws IOException {
first = in.readUTF();
second = in.readUTF();
third = in.readUTF();
}

public void write(DataOutput out) throws IOException {
out.writeUTF(first);
out.writeUTF(second);
out.writeUTF(third);
}

public int compareTo(PairWritable o) {
if (!first.equals(o.first) ) {
return first.compareTo(o.first);
}
else if (!second.equals(o.second)) {
return second.compareTo(o.second);
}
else {
return third.compareTo(o.third);
}
}
}

public static class FirstPartitioner extends Partitioner<PairWritable, Text> {
public int getPartition(PairWritable key, Text value,int numPartitions) {
return Math.abs(key.getFirst().hashCode() * 127) % numPartitions;
}
}

public static class GroupingComparator extends WritableComparator {
protected GroupingComparator() {
super(PairWritable.class, true);
}
public int compare(WritableComparable w1, WritableComparable w2) {
PairWritable ip1 = (PairWritable) w1;
PairWritable ip2 = (PairWritable) w2;
String l = ip1.getFirst();
String r = ip2.getFirst();
return l.compareTo(r);
}
}

public static class OrderMapper extends org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, PairWritable, Text> {
private final PairWritable keyPair = new PairWritable();
String[] keyValueArray=null;

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
keyValueArray = line.split("\t");
if(keyValueArray.length != 5) {
return;
}

keyPair.set(keyValueArray[2],keyValueArray[0], keyValueArray[1]);
context.write(keyPair,new Text(keyValueArray[3]));
}
}

public static class OrderReducer extends Reducer<PairWritable, Text, IntWritable, NullWritable> {
int countSingle;
int countReturn;
protected void setup(Context context) {

}

protected void reduce(PairWritable key, Iterable<Text> values,Context context) throws IOException, InterruptedException {
List<String> list = new LinkedList<String>();
for (Text val : values) {
list.add(val.toString());
}
if(list.size()==3) {
if (list.get(0).startsWith("search-dancheng")
&& list.get(1).startsWith("detail")
&& list.get(2).startsWith("submit")) {
countSingle++;
} else if (list.get(0).startsWith("search-wangfan")
&& list.get(1).startsWith("detail")
&& list.get(2).startsWith("submit")) {
countReturn++;
}
}
}
protected void cleanup(Context context) {
try {
context.write(new IntWritable(countSingle),NullWritable.get());
context.write(new IntWritable(countReturn),NullWritable.get());
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}

public int run(String[] args) throws Exception {
Job job = Job.getInstance(super.getConf(), //
OrderOperationJob.class.getSimpleName()
);

job.setJarByClass(OrderOperationJob.class);
job.setMapperClass(OrderMapper.class);
job.setReducerClass(OrderReducer.class);
job.setPartitionerClass(FirstPartitioner.class);
job.setGroupingComparatorClass(GroupingComparator.class);
job.setMapOutputKeyClass(PairWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileSystem.get(super.getConf()).delete(new Path(args[1]), true);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean isSuccess = job.waitForCompletion(true);

return isSuccess ? 0 : 1; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(),new OrderOperationJob(), args); System.exit(res); }}

相關文章
相關標籤/搜索