hadoop教程-hdfs客戶端開發

hadoop環境

hadoop版本爲java

hadoop version
Hadoop 2.10.1
Subversion https://github.com/apache/hadoop -r 1827467c9a56f133025f28557bfc2c562d78e816
Compiled by centos on 2020-09-14T13:17Z
Compiled with protoc 2.5.0
From source with checksum 3114edef868f1f3824e7d0f68be03650

客戶端開發

  • 引入依賴(使用maven)
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>2.10.1</version>
</dependency>

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-hdfs</artifactId>
    <version>2.10.1</version>
</dependency>

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-common</artifactId>
    <version>2.10.1</version>
</dependency>
  • 編寫代碼
package com.definesys.hadoop;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;

import java.io.FileInputStream;
import java.io.IOException;


/**
 * @Description:
 * @author: jianfeng.zheng
 * @since: 2020/12/14 12:36 上午
 * @history: 1.2020/12/14 created by jianfeng.zheng
 */
public class HDFS {

    public static void main(String[] cmd) throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://master:9000/");
//        conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
        System.setProperty("HADOOP_USER_NAME", "hadoop");
        FileSystem fs = FileSystem.get(conf);
        Path dst = new Path("hdfs://master:9000/demo/hello.txt");
        FSDataOutputStream os = fs.create(dst);
        FileInputStream is = new FileInputStream("/root/hello.txt");
        IOUtils.copy(is, os);
        is.close();
        os.close();
        fs.close();
    }
}
  • 打包

若是是web應用,通常會打包爲war或者ear,無論是哪一種,這兩種包格式都會把依賴包打進去,所以不用作特殊處理,若是須要本地運行,那麼須要藉助兩個插件,把如下配置信息複製到pom.xml中node

<build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-jar-plugin</artifactId>
            <version>2.6</version>
            <configuration>
                <archive>
                    <manifest>
                        <addClasspath>true</addClasspath>
                        <classpathPrefix>lib/</classpathPrefix>
                        <mainClass>com.definesys.hadoop.HDFS</mainClass>
                    </manifest>
                </archive>
            </configuration>
        </plugin>

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-dependency-plugin</artifactId>
            <executions>
                <execution>
                    <id>copy-dependencies</id>
                    <phase>package</phase>
                    <goals>
                        <goal>copy-dependencies</goal>
                    </goals>
                    <configuration>
                        <outputDirectory>${project.build.directory}/lib</outputDirectory>
                        <overWriteReleases>false</overWriteReleases>
                        <overWriteSnapshots>false</overWriteSnapshots>
                        <overWriteIfNewer>true</overWriteIfNewer>
                    </configuration>
                </execution>
            </executions>
        </plugin>
    </plugins>
</build>

maven-jar-plugin會根據配置生成MANIFEST.MF文件,MANIFEST.MF文件記錄運行類信息,依賴信息,相似如下這樣git

Manifest-Version: 1.0
Archiver-Version: Plexus Archiver
Built-By: asan
Class-Path: lib/hadoop-client-2.10.1.jar ....
Created-By: Apache Maven 3.6.3
Build-Jdk: 1.8.0_161
Main-Class: com.definesys.hadoop.HDFS

classpathPrefix指定了依賴jar包所在的路徑爲lib,maven-dependency-plugin插件負責將依賴包所有copy到指定路徑下,這裏指定了${project.build.directory}/lib目錄,和classpathPrefix對應,打包完成後執行如下命令便可github

java -jar hadoop-hdfs-1.0.jar

#或者手動指定運行類

java -cp hadoop-hdfs-1.0.jar com.definesys.hadoop.HDFS
打包還有一個插件maven-assembly-plugin,不建議使用這個插件進行打包,緣由是這個插件會將全部依賴解壓放到一個jar包裏,hadoop有些機制是經過spi實現,解壓後會形成配置文件覆蓋的狀況

一個簡單的HDFS操做類

package com.definesys.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.FileInputStream;
import java.io.IOException;


/**
 * @Description:
 * @author: jianfeng.zheng
 * @since: 2020/12/14 12:36 上午
 * @history: 1.2020/12/14 created by jianfeng.zheng
 */
public class HDFS {

    public static void main(String[] cmd) throws IOException {
        HDFS hdfs = new HDFS();
        hdfs.mkdir("/hdfsDemo");
        hdfs.putFile("/root/hello.txt", "/hdfsDemo");
        hdfs.dowloadFile("/hdfsDemo/hello.txt", "/root/hello-hdfs.txt");
        hdfs.deleteFile("/hdfsDemo");
    }

    public boolean mkdir(String path) throws IOException {
        FileSystem fs = this.getHDFSFileSystem();
        return fs.mkdirs(new Path(path));
    }

    public void putFile(String localPath, String hdfsPath) throws IOException {
        this.getHDFSFileSystem().copyFromLocalFile(new Path(localPath), new Path(hdfsPath));
    }

    public void deleteFile(String path) throws IOException {
        this.getHDFSFileSystem().delete(new Path(path), true);
    }

    public void dowloadFile(String hdfsPath, String localPath) throws IOException {
        this.getHDFSFileSystem().copyToLocalFile(new Path(hdfsPath), new Path(localPath));
    }

    private FileSystem getHDFSFileSystem() {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://master:9000/");
        System.setProperty("HADOOP_USER_NAME", "hadoop");
        try {
            FileSystem fs = FileSystem.get(conf);
            return fs;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

問題

權限問題

Exception in thread "main" org.apache.hadoop.security.AccessControlException: Permission denied: user=root, access=WRITE, inode="/":hadoop:supergroup:drwxr-xr-x
        at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:350)
        at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:251)
        at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:189)

HDFS文件系統權限和Linux相似,不一樣的用戶對文件操做權限不同,若是代碼中沒有指定用戶名,那麼就用執行程序的操做系統做爲用戶名,在這裏是root,咱們能夠看下hdfs的文件權限web

$ hadoop fs -ls /
Found 5 items
drwxr-xr-x   - asan   supergroup          0 2020-12-16 10:07 /001
drwx-w----   - hadoop supergroup          0 2020-12-07 10:54 /tmp
drwxr-xr-x   - hadoop supergroup          0 2020-12-07 11:05 /user

# 根路徑權限

$ hadoop fs -ls -d /
drwxr-xr-x   - hadoop supergroup          0 2020-12-18 00:42 /

有幾個解決方案apache

  • 修改根路徑權限或者其餘文件夾權限爲777
$ hadoop fs -chmod 777 /demo

$ hadoop fs -ls -d /demo
drwxrwxrwx   - hadoop supergroup          0 2020-12-18 00:46 /demo
  • 取消權限驗證

在master節點加入如下配置centos

<property>
    <name>dfs.permissions.enabled</name>
    <value>false</value>
</property>
  • 在代碼中加入用戶名配置(推薦)
System.setProperty("HADOOP_USER_NAME", "hadoop");
代碼需在執行hdfs操做以前加入
相關文章
相關標籤/搜索