一.eclipse hadoop 插件 使用
1 .下載: hadoop-eclipse-plugin-1.2.1java
2.放入eclipse 重啓eclispsenode
設置以下apache
檢查文件是否正確windows
3. 建立 和刪除文件 ,文件夾
在windows狀況下,若是沒有出現以上界面: 請hadoop 的錯誤集合 - 增長權限問題app
4. 命令 產看 hadoop 文件 eclipse
hadoop fs -ls / //查看根節點 全部文件和目錄oop
hadoop fs -cat /usr/input/wc/test.txt //查看文件內部內容測試
二.新建一個hadoop項目
2.會導入相印的包 , 這些包的路徑 在 你配置的hadoop路徑 ,,請參考上上面的圖spa
2. 編寫 程序:插件
WcMapper.java
package com.bjsxt.mr; import java.io.IOException; import java.io.InterruptedIOException; import java.util.StringTokenizer; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable> { // 23 9:31 視頻 protected void map(LongWritable key ,Text value,Context context) throws IOException, InterruptedException { // throws IOException , InterruptedIOException String line =value.toString(); StringTokenizer st=new StringTokenizer(line); while(st.hasMoreTokens()){ String world=st.nextToken(); context.write(new Text(world), new IntWritable(1)); // map的輸出! // } } }WcReducer
package com.bjsxt.mr; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class WcReducer extends Reducer<Text, IntWritable, Text, IntWritable>{ protected void reduce(Text key ,Iterable<IntWritable> iterable,Context context) throws IOException, InterruptedException{ int sum=0; for(IntWritable i : iterable){ sum=sum+i.get(); } context.write(key, new IntWritable(sum)); } }JobRun
package com.bjsxt.mr; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; public class JobRun { public static void main(String[] args) { System.out.println("job run starting"); Configuration conf=new Configuration(); conf.set("mapred.job.tracker", "node1:9001"); try { Job job = new Job(conf); job.setJarByClass(JobRun.class); job.setMapperClass(WcMapper.class); job.setReducerClass(WcReducer.class); job.setMapOutputKeyClass(Text.class); //設置參數類型 job.setMapOutputValueClass(IntWritable.class); // job.setNumReduceTasks(tasks); // 設置 reduce任務個數的任務 FileInputFormat.addInputPath(job, new Path("/usr/input/wc/")); FileOutputFormat.setOutputPath(job, new Path("/usr/output/wc")); System.exit(job.waitForCompletion(true) ? 0 : 1); System.out.println("job run end"); } catch (Exception e) { e.printStackTrace(); } } }
導出到wc.jar , 上傳到 node1機器上
運行命令: hadoop jar /root/wc.jar com.bjsxt.mr.JobRun
運行結果 : [root@node1 ~]# hadoop jar /root/wc.jar com.bjsxt.mr.JobRun
job run starting
16/11/22 15:15:07 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
16/11/22 15:15:07 INFO input.FileInputFormat: Total input paths to process : 1
16/11/22 15:15:07 INFO util.NativeCodeLoader: Loaded the native-hadoop library
16/11/22 15:15:07 WARN snappy.LoadSnappy: Snappy native library not loaded
16/11/22 15:15:07 INFO mapred.JobClient: Running job: job_201611221047_0007
16/11/22 15:15:08 INFO mapred.JobClient: map 0% reduce 0%
16/11/22 15:15:12 INFO mapred.JobClient: map 100% reduce 0%
16/11/22 15:15:20 INFO mapred.JobClient: map 100% reduce 33%
16/11/22 15:15:21 INFO mapred.JobClient: map 100% reduce 100%
16/11/22 15:15:22 INFO mapred.JobClient: Job complete: job_201611221047_0007
16/11/22 15:15:22 INFO mapred.JobClient: Counters: 29
16/11/22 15:15:22 INFO mapred.JobClient: Job Counters
16/11/22 15:15:22 INFO mapred.JobClient: Launched reduce tasks=1
16/11/22 15:15:22 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=4220
16/11/22 15:15:22 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
16/11/22 15:15:22 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
16/11/22 15:15:22 INFO mapred.JobClient: Launched map tasks=1
16/11/22 15:15:22 INFO mapred.JobClient: Data-local map tasks=1
16/11/22 15:15:22 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=9213
16/11/22 15:15:22 INFO mapred.JobClient: File Output Format Counters
16/11/22 15:15:22 INFO mapred.JobClient: Bytes Written=100
16/11/22 15:15:22 INFO mapred.JobClient: FileSystemCounters
16/11/22 15:15:22 INFO mapred.JobClient: FILE_BYTES_READ=202
16/11/22 15:15:22 INFO mapred.JobClient: HDFS_BYTES_READ=213
16/11/22 15:15:22 INFO mapred.JobClient: FILE_BYTES_WRITTEN=108062
16/11/22 15:15:22 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=100
16/11/22 15:15:22 INFO mapred.JobClient: File Input Format Counters
16/11/22 15:15:22 INFO mapred.JobClient: Bytes Read=109
16/11/22 15:15:22 INFO mapred.JobClient: Map-Reduce Framework
16/11/22 15:15:22 INFO mapred.JobClient: Map output materialized bytes=202
16/11/22 15:15:22 INFO mapred.JobClient: Map input records=6
16/11/22 15:15:22 INFO mapred.JobClient: Reduce shuffle bytes=202
16/11/22 15:15:22 INFO mapred.JobClient: Spilled Records=34
16/11/22 15:15:22 INFO mapred.JobClient: Map output bytes=162
16/11/22 15:15:22 INFO mapred.JobClient: Total committed heap usage (bytes)=175706112
16/11/22 15:15:22 INFO mapred.JobClient: CPU time spent (ms)=1820
16/11/22 15:15:22 INFO mapred.JobClient: Combine input records=0
16/11/22 15:15:22 INFO mapred.JobClient: SPLIT_RAW_BYTES=104
16/11/22 15:15:22 INFO mapred.JobClient: Reduce input records=17
16/11/22 15:15:22 INFO mapred.JobClient: Reduce input groups=13
16/11/22 15:15:22 INFO mapred.JobClient: Combine output records=0
16/11/22 15:15:22 INFO mapred.JobClient: Physical memory (bytes) snapshot=258162688
16/11/22 15:15:22 INFO mapred.JobClient: Reduce output records=13
16/11/22 15:15:22 INFO mapred.JobClient: Virtual memory (bytes) snapshot=1467170816
16/11/22 15:15:22 INFO mapred.JobClient: Map output records=17
[root@node1 ~]# hadoop文件信息:
1.打包上面的 wc.jar
修改main程序
JobrunFroWin7.java
package com.bjsxt.mr; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; public class JobRunForWin7 { public static void main(String[] args) { Configuration conf=new Configuration(); //windows 本地測試 hadoop程序 conf.set("fs.default.name", "hdfs://node1:9000"); conf.set("mapred.jar", "D:\\wc.jar"); conf.set("mapred.job.tracker", "node1:9001"); try { Job job = new Job(conf); job.setJarByClass(JobRun.class); job.setMapperClass(WcMapper.class); job.setReducerClass(WcReducer.class); job.setMapOutputKeyClass(Text.class); //設置參數類型 job.setMapOutputValueClass(IntWritable.class); // job.setNumReduceTasks(tasks); // 設置 reduce任務個數的任務 FileInputFormat.addInputPath(job, new Path("/usr/input/wc/")); FileOutputFormat.setOutputPath(job, new Path("/usr/output/wc1")); System.exit(job.waitForCompletion(true) ? 0 : 1); System.out.println("job run end"); } catch (Exception e) { e.printStackTrace(); } } }
調試結果: