hadoop版本爲java
hadoop version Hadoop 2.10.1 Subversion https://github.com/apache/hadoop -r 1827467c9a56f133025f28557bfc2c562d78e816 Compiled by centos on 2020-09-14T13:17Z Compiled with protoc 2.5.0 From source with checksum 3114edef868f1f3824e7d0f68be03650
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.10.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.10.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.10.1</version> </dependency>
package com.definesys.hadoop; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import java.io.FileInputStream; import java.io.IOException; /** * @Description: * @author: jianfeng.zheng * @since: 2020/12/14 12:36 上午 * @history: 1.2020/12/14 created by jianfeng.zheng */ public class HDFS { public static void main(String[] cmd) throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://master:9000/"); // conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); System.setProperty("HADOOP_USER_NAME", "hadoop"); FileSystem fs = FileSystem.get(conf); Path dst = new Path("hdfs://master:9000/demo/hello.txt"); FSDataOutputStream os = fs.create(dst); FileInputStream is = new FileInputStream("/root/hello.txt"); IOUtils.copy(is, os); is.close(); os.close(); fs.close(); } }
若是是web應用,通常會打包爲war或者ear,無論是哪一種,這兩種包格式都會把依賴包打進去,所以不用作特殊處理,若是須要本地運行,那麼須要藉助兩個插件,把如下配置信息複製到pom.xml中node
<build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> <version>2.6</version> <configuration> <archive> <manifest> <addClasspath>true</addClasspath> <classpathPrefix>lib/</classpathPrefix> <mainClass>com.definesys.hadoop.HDFS</mainClass> </manifest> </archive> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-dependency-plugin</artifactId> <executions> <execution> <id>copy-dependencies</id> <phase>package</phase> <goals> <goal>copy-dependencies</goal> </goals> <configuration> <outputDirectory>${project.build.directory}/lib</outputDirectory> <overWriteReleases>false</overWriteReleases> <overWriteSnapshots>false</overWriteSnapshots> <overWriteIfNewer>true</overWriteIfNewer> </configuration> </execution> </executions> </plugin> </plugins> </build>
maven-jar-plugin會根據配置生成MANIFEST.MF
文件,MANIFEST.MF文件記錄運行類信息,依賴信息,相似如下這樣git
Manifest-Version: 1.0 Archiver-Version: Plexus Archiver Built-By: asan Class-Path: lib/hadoop-client-2.10.1.jar .... Created-By: Apache Maven 3.6.3 Build-Jdk: 1.8.0_161 Main-Class: com.definesys.hadoop.HDFS
classpathPrefix指定了依賴jar包所在的路徑爲lib,maven-dependency-plugin插件負責將依賴包所有copy到指定路徑下,這裏指定了${project.build.directory}/lib目錄,和classpathPrefix對應,打包完成後執行如下命令便可github
java -jar hadoop-hdfs-1.0.jar #或者手動指定運行類 java -cp hadoop-hdfs-1.0.jar com.definesys.hadoop.HDFS
打包還有一個插件maven-assembly-plugin,不建議使用這個插件進行打包,緣由是這個插件會將全部依賴解壓放到一個jar包裏,hadoop有些機制是經過spi實現,解壓後會形成配置文件覆蓋的狀況
package com.definesys.hadoop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.FileInputStream; import java.io.IOException; /** * @Description: * @author: jianfeng.zheng * @since: 2020/12/14 12:36 上午 * @history: 1.2020/12/14 created by jianfeng.zheng */ public class HDFS { public static void main(String[] cmd) throws IOException { HDFS hdfs = new HDFS(); hdfs.mkdir("/hdfsDemo"); hdfs.putFile("/root/hello.txt", "/hdfsDemo"); hdfs.dowloadFile("/hdfsDemo/hello.txt", "/root/hello-hdfs.txt"); hdfs.deleteFile("/hdfsDemo"); } public boolean mkdir(String path) throws IOException { FileSystem fs = this.getHDFSFileSystem(); return fs.mkdirs(new Path(path)); } public void putFile(String localPath, String hdfsPath) throws IOException { this.getHDFSFileSystem().copyFromLocalFile(new Path(localPath), new Path(hdfsPath)); } public void deleteFile(String path) throws IOException { this.getHDFSFileSystem().delete(new Path(path), true); } public void dowloadFile(String hdfsPath, String localPath) throws IOException { this.getHDFSFileSystem().copyToLocalFile(new Path(hdfsPath), new Path(localPath)); } private FileSystem getHDFSFileSystem() { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://master:9000/"); System.setProperty("HADOOP_USER_NAME", "hadoop"); try { FileSystem fs = FileSystem.get(conf); return fs; } catch (IOException e) { throw new RuntimeException(e); } } }
Exception in thread "main" org.apache.hadoop.security.AccessControlException: Permission denied: user=root, access=WRITE, inode="/":hadoop:supergroup:drwxr-xr-x at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:350) at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:251) at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:189)
HDFS文件系統權限和Linux相似,不一樣的用戶對文件操做權限不同,若是代碼中沒有指定用戶名,那麼就用執行程序的操做系統做爲用戶名,在這裏是root,咱們能夠看下hdfs的文件權限web
$ hadoop fs -ls / Found 5 items drwxr-xr-x - asan supergroup 0 2020-12-16 10:07 /001 drwx-w---- - hadoop supergroup 0 2020-12-07 10:54 /tmp drwxr-xr-x - hadoop supergroup 0 2020-12-07 11:05 /user # 根路徑權限 $ hadoop fs -ls -d / drwxr-xr-x - hadoop supergroup 0 2020-12-18 00:42 /
有幾個解決方案apache
$ hadoop fs -chmod 777 /demo $ hadoop fs -ls -d /demo drwxrwxrwx - hadoop supergroup 0 2020-12-18 00:46 /demo
在master節點加入如下配置centos
<property> <name>dfs.permissions.enabled</name> <value>false</value> </property>
System.setProperty("HADOOP_USER_NAME", "hadoop");
代碼需在執行hdfs操做以前加入