前面寫一篇hadoop學習記錄-安裝的文章發現其實沒有必要這麼詳細,我其實就是想記錄那些本身的看法和學習心得也是爲了往後的複習,因此沒有必要寫的那麼詳盡。node
slaves.sh uptime | sort:檢查hadoop datanode節點啓動時間等信息。apache
slaves.sh jps | sort:檢查各個datanode節點進程啓動狀況windows
在windows上開發hadoop程序的過程當中,不少人喜歡在eclipse上安裝插件實現遠程調試,其實否則,服務器
能夠利用ant來實現一樣的功能,經過ant將程序打包之後使用scp將jar包傳送到hadoop平臺運行便可,app
這個過程ant須要引用jsch這個第三方jar包,而後才能在build.xml腳本中使用scp和sshexec:eclipse
<target name="scp" depends="jar" description="將項目jar文件經過ssh拷貝到遠程服務器指定目錄下">ssh
<scp file="${product}/${jar}" todir="root:liuweicai@master:~" trust="true" />ide
</target>oop
<target name="sshexec" depends="scp" description="經過ssh遠程執行hadoop任務" >學習
<sshexec host="master" username="root" password="liuweicai" command="hadoop jar ${jar} -jt master:9001 /cache /output " trust="true"/>
</target>
這樣就能經過ssh協議遠程運行hadoop的job了!
在編寫hadoop程序的運行驅動程序Driver時,推薦使用繼承Configured和實現Tool接口,如如下面參考代碼:
package com.ims.hadoop.commentwordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class CommentWordCountDriver extends Configured implements Tool {
public static void main(String[] args) throws Exception{
int res = ToolRunner.run(new CommentWordCountDriver(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
if(args.length != 2){
System.err.printf("Usage: %s [generic options] <input> <output>\n",getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
Path in = new Path(args[0]);
Path out = new Path(args[1]);
FileSystem fs = FileSystem.get(conf);
if(fs.exists(out))
fs.delete(out, true);
conf.setBoolean("mapred.output.compress", true);
conf.setClass("mapred.output.compression.codec", GzipCodec.class,CompressionCodec.class);
Job job = new Job(conf,"StackOverFlow Comment Word Count");
job.setJarByClass(CommentWordCount.class);
job.setMapperClass(WordCountMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
return (job.waitForCompletion(true) ? 0 : 1);
}
}