使用idea在windows上鍊接遠程hadoop開發

一.前置環境準備html

1.下載一份hadoop本地解壓,配置HADOOP_HOME的環境變量   java

idea運行時會讀這個環境變量而後找到他裏面的bin文件,其實不須要啓動 只要有bin這個目錄就行,否則會報錯 找不到HADOOP_HOME這個環境變量git

2.bin裏面缺乏了winutils.exe和hadoop.dll 須要額外下載github

https://github.com/steveloughran/winutilsexpress

也能夠不下載hadoop直接下載這個bin把環境變量配置成這個bin的上一級目錄apache

3.將hadoop.dll 複製到C:\Windows\System32中 不然 會報 Exception in thread "main"java.lang.UnsatisfiedLinkError:org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Zwindows

 

二.構建項目服務器

  1.導入jarapp

<dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>3.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>3.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>3.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
            <version>3.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>3.1.0</version>
        </dependency>

  2.拷貝源碼中WordCount.java  位置在 hadoop-3.1.0-src\hadoop-mapreduce-project\hadoop-mapreduce-client\hadoop-mapreduce-client-jobclient\src\test\java\org\apache\hadoop\mapred目錄中  我這個稍有改動less

/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */

import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** * This is an example Hadoop Map/Reduce application. * It reads the text input files, breaks each line into words * and counts them. The output is a locally sorted list of words and the * count of how often they occurred. * * To run: bin/hadoop jar build/hadoop-examples.jar wordcount * [-m <i>maps</i>] [-r <i>reduces</i>] <i>in-dir</i> <i>out-dir</i> */
public class WordCount extends Configured implements Tool { /** * Counts the words in each line. * For each line of input, break the line into words and emit them as * (<b>word</b>, <b>1</b>). */
  public static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer itr = new StringTokenizer(line," \t\n\r\f,.:;?![]'"); while (itr.hasMoreTokens()) { word.set(itr.nextToken().toLowerCase()); output.collect(word, one); } } } /** * A reducer class that just emits the sum of the input values. */
  public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } if(sum>4){ output.collect(key, new IntWritable(sum)); } } } static int printUsage() { System.out.println("wordcount [-m <maps>] [-r <reduces>] <input> <output>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } /** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */
  public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WordCount.class); conf.setJobName("wordcount"); // the keys are words (strings)
    conf.setOutputKeyClass(Text.class); // the values are counts (ints)
    conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); List<String> other_args = new ArrayList<String>(); for(int i=0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i-1]); return printUsage(); } } // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new WordCount(), new String[]{"D:\\my.txt","D:\\out"}); System.exit(res); } }

 

運行可能會報權限不足的問題 ,編輯服務器etc/hadoop/hdfs-site.xml  將 dfs.permissions修改成false 重啓便可

<property>
    <name>dfs.permissions</name>
    <value>false</value>
</property>

好啦 如今運行 

 

控制檯沒有任何報錯  去D盤看看

D盤已經生成了out文件夾 打開out 發現裏面有四個文件 比服務器本地執行多了兩個.crc文件  咱們先看看part-00000

已經出來統計結果了 。idea本地調用遠程hadoop服務成功!  eclipse應該也差很少 ,以前百度大可能是eclipse的教程,並且好像還要有什麼插件,可是今天就弄了幾個文件就行了,不知道是否是hadoop3對windows方面作了升級。

 

剛剛也打開了crc文件裏面是亂碼 

百度了一下說是hadoop數據校驗文件 

你們有興趣能夠看看這篇博客 瞭解crc文件更多知識  (我是隻看了前面 是否是太沒耐心了  。。。)

https://www.cnblogs.com/gpcuster/archive/2011/01/26/1945363.html

 

還在一我的摸爬滾打學習hadoop  你們有興趣能夠一塊兒交流

相關文章
相關標籤/搜索