#!/bin/bash file_name=$1 #刪除結果目錄 hadoop fs -rm -r /bigdata/output/test/ #刪除已經存在的文件 hadoop fs -rm /bigdata/input/test/$file_name #上傳本地文件到hdfs hadoop fs -put $file_name /bigdata/input/test/ #統計行數 hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-streaming-1.0.1.jar -files count.sh -input /bigdata/input/test/$file_name -output /bigdata/output/test/ -mapper 'wc -l' -reducer "sh count.sh"
count.shshell
#!/bin/bash count=0 while read LINE;do count=$(($count+$LINE)) done echo $count
運行成功後在hdfs目錄下會產生結果文件bash
hadoop fs -cat /output/test/part-00000