(1)理解HDFS在Hadoop體系結構中的角色。 (2)熟練使用HDFS操做經常使用的Shell命令。 (3)熟悉HDFS操做經常使用的Java API。java
操做系統:Linux。 Hadoop 版本:2.7.3 或以上版本。 JDK 版本:1.7 或以上版本。 Java IDE:IDEAshell
shell:apache
hadoop fs -put /User/Binguner/Desktop/test.txt /test
hadoop fs -appendToFile /User/Binguner/Desktop/test.txt /test/test.txt
hadoop fs -copyFromLocal -f /User/Binguner/Desktop/test.txt / input/test.txt
複製代碼
/** * @param fileSystem * @param srcPath 本地文件地址 * @param desPath 目標文件地址 */
private static void test1(FileSystem fileSystem,Path srcPath, Path desPath){
try {
if (fileSystem.exists(new Path("/test/test.txt"))){
System.out.println("Do you want to overwrite the existed file? ( y / n )");
if (new Scanner(System.in).next().equals("y")){
fileSystem.copyFromLocalFile(false,true,srcPath,desPath);
}else {
FileInputStream inputStream = new FileInputStream(srcPath.toString());
FSDataOutputStream outputStream = fileSystem.append(new Path("/test/test.txt"));
byte[] bytes = new byte[1024];
int read = -1;
while ((read = inputStream.read(bytes)) > 0){
outputStream.write(bytes,0,read);
}
inputStream.close();
outputStream.close();
}
}else {
fileSystem.copyFromLocalFile(srcPath,desPath);
}
} catch (IOException e) {
e.printStackTrace();
}
}
複製代碼
運行結果:編程
HDFS 中原來到文件列表:
緩存
第一次運行:app
HDFS 中文件列表:dom
第二次運行:oop
此時 HDFS 中的目錄:學習
shell:大數據
hadoop fs -copyToLocal /input/test.txt /User/binguner/Desktop/test.txt
複製代碼
/** * @param fileSystem * @param remotePath HDFS 中文件的地址 * @param localPath 本地要保存的文件的地址 */
private static void test2(FileSystem fileSystem,Path remotePath, Path localPath){
try {
if (fileSystem.exists(remotePath)){
fileSystem.copyToLocalFile(remotePath,localPath);
}else {
System.out.println("Can't find this file in HDFS!");
}
} catch (FileAlreadyExistsException e){
try {
System.out.println(localPath.toString());
fileSystem.copyToLocalFile(remotePath,new Path("src/test"+ new Random().nextInt()+".txt"));
} catch (IOException e1) {
e1.printStackTrace();
}
} catch (IOException e) {
e.printStackTrace();
}
}
複製代碼
執行前本地目錄:
第一次執行:
第二次執行:
shell:
hadoop fs -cat /test/test.txt
複製代碼
/** * @param fileSystem * @param remotePath 目標文件地址 */
private static void test3(FileSystem fileSystem,Path remotePath){
try {
FSDataInputStream inputStream= fileSystem.open(remotePath);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line;
while ((line = bufferedReader.readLine()) != null){
System.out.println(line);
}
} catch (IOException e) {
e.printStackTrace();
}
}
複製代碼
運行結果:
shell:
hadoop fs -ls -h /test/test.txt
複製代碼
/** * @param fileSystem * @param remotePath 目標文件地址 */
private static void test4(FileSystem fileSystem, Path remotePath){
try {
FileStatus[] fileStatus = fileSystem.listStatus(remotePath);
for (FileStatus status : fileStatus){
System.out.println(status.getPermission());
System.out.println(status.getBlockSize());
System.out.println(status.getAccessTime());
System.out.println(status.getPath());
}
} catch (IOException e) {
e.printStackTrace();
}
複製代碼
運行結果:
shell:
hadoop fs -lsr -h /
複製代碼
/** * @param fileSystem * @param remotePath 目標文件地址 */
private static void test5(FileSystem fileSystem, Path remotePath){
try {
RemoteIterator<LocatedFileStatus> iterator = fileSystem.listFiles(remotePath,true);
while (iterator.hasNext()){
FileStatus status = iterator.next();
System.out.println(status.getPath());
System.out.println(status.getPermission());
System.out.println(status.getLen());
System.out.println(status.getModificationTime());
}
} catch (IOException e) {
e.printStackTrace();
}
}
複製代碼
運行結果:
shell:
hadoop fs -touchz /test/test.txt
hadoop fs -mkdir /test
hadoop fs -rm -R /test/text.txt
複製代碼
/** * @param fileSystem * @param remoteDirPath 目標文件夾地址 * @param remoteFilePath 目標文件路徑 */
private static void test6(FileSystem fileSystem, Path remoteDirPath, Path remoteFilePath){
try {
if (fileSystem.exists(remoteDirPath)){
System.out.println("Please choose your option: 1.create. 2.delete");
int i = new Scanner(System.in).nextInt();
switch (i){
case 1:
fileSystem.create(remoteFilePath);
break;
case 2:
fileSystem.delete(remoteDirPath,true);
break;
}
}else {
fileSystem.mkdirs(remoteDirPath);
}
} catch (IOException e) {
e.printStackTrace();
}
}
複製代碼
第一次執行前:
第一次執行:
第一次執行後自動建立文件目錄
第二次執行,選擇建立文件:
第三次執行,選擇刪除文件:
shell:
hadoop fs -touchz /test/test.txt
hadoop fs -mkdir /test
hadoop fs -rm -R /test/text.txt
複製代碼
/** * @param fileSystem * @param remotePath 目標文件夾地址 */
private static void test7(FileSystem fileSystem, Path remotePath){
try {
if (!fileSystem.exists(remotePath)){
System.out.println("Can't find this path, the path will be created automatically");
fileSystem.mkdirs(remotePath);
return;
}
System.out.println("Do you want to delete this dir? ( y / n )");
if (new Scanner(System.in).next().equals("y")){
FileStatus[] iterator = fileSystem.listStatus(remotePath);
if (iterator.length != 0){
System.out.println("There are some files in this dictionary, do you sure to delete all? (y / n)");
if (new Scanner(System.in).next().equals("y")){
if (fileSystem.delete(remotePath,true)){
System.out.println("Delete successful");
return;
}
}
}
if (fileSystem.delete(remotePath,true)){
System.out.println("Delete successful");
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
複製代碼
執行前的 HDFS 文件列表:
第一次執行(刪除全部文件):
此時 HDFS 中的文件列表:
再次運行程序,自動建立文件夾:
shell:
hadoop fs -get text.txt
cat text.txt >> local.txt
hadoop fs -copyFromLocal -f text.txt text.txt
複製代碼
/** * @param fileSystem * @param remotePath HDFS 中文件到路徑 * @param localPath 本地文件路徑 */
private static void test8(FileSystem fileSystem,Path remotePath, Path localPath){
try {
if (!fileSystem.exists(remotePath)){
System.out.println("Can't find this file");
return;
}
System.out.println("input 1 or 2 , add the content to the remote file's start or end");
switch (new Scanner(System.in).nextInt()){
case 1:
fileSystem.moveToLocalFile(remotePath, localPath);
FSDataOutputStream fsDataOutputStream = fileSystem.create(remotePath);
FileInputStream fileInputStream = new FileInputStream("/Users/binguner/IdeaProjects/HadoopDemo/src/test2.txt");
FileInputStream fileInputStream1 = new FileInputStream("/Users/binguner/IdeaProjects/HadoopDemo/src/test.txt");
byte[] bytes = new byte[1024];
int read = -1;
while ((read = fileInputStream.read(bytes)) > 0) {
fsDataOutputStream.write(bytes,0,read);
}
while ((read = fileInputStream1.read(bytes)) > 0){
fsDataOutputStream.write(bytes,0,read);
}
fileInputStream.close();
fileInputStream1.close();
fsDataOutputStream.close();
break;
case 2:
FileInputStream inputStream = new FileInputStream("/Users/binguner/IdeaProjects/HadoopDemo/"+localPath.toString());
FSDataOutputStream outputStream = fileSystem.append(remotePath);
byte[] bytes1 = new byte[1024];
int read1 = -1;
while ((read1 = inputStream.read(bytes1)) > 0){
outputStream.write(bytes1,0,read1);
}
inputStream.close();
outputStream.close();
break;
}
} catch (IOException e) {
e.printStackTrace();
}
}
複製代碼
執行前 HDFS 中文件的內容:
第一次執行,加載文件內容到原有文件開頭:
第二次執行,加載文件內容到原有文件末尾:
shell:
hadoop fs -rm -R /test/test.txt
複製代碼
private static void test9(FileSystem fileSystem,Path remotePath){
try {
if(fileSystem.delete(remotePath,true)){
System.out.println("Delete success");
}else {
System.out.println("Delete failed");
}
} catch (IOException e) {
e.printStackTrace();
}
}
複製代碼
原來 HDFS 中到目錄結構:
執行刪除操做:
shell:
hadoop fs -mv /test/test.txt /test2
複製代碼
/** * @param fileSystem * @param oldRemotePath old name * @param newRemotePath new name */
private static void test10(FileSystem fileSystem, Path oldRemotePath, Path newRemotePath){
try {
if (fileSystem.rename(oldRemotePath,newRemotePath)){
System.out.println("Rename success");
}else {
System.out.println("Rename failed");
}
} catch (IOException e) {
e.printStackTrace();
}
}
複製代碼
文件原來的名稱:
執行修改操縱:
MyFSDataInputStream
,該類繼承org.apache.hadoop.fs.FSDataInputStream
,要求以下:readLine()
,若是讀到文件末尾,則返回空,不然返回文件一行的文本。MyFSDataInputStream
讀取若干字節數據時,首先查找緩存,若是緩存中所需數據,則直接由緩存提供,不然向 HDFS 讀取數據。import org.apache.hadoop.fs.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class MyFSDataInputStream extends FSDataInputStream {
private static MyFSDataInputStream myFSDataInputStream;
private static InputStream inputStream;
private MyFSDataInputStream(InputStream in) {
super(in);
inputStream = in;
}
public static MyFSDataInputStream getInstance(InputStream inputStream){
if (null == myFSDataInputStream){
synchronized (MyFSDataInputStream.class){
if (null == myFSDataInputStream){
myFSDataInputStream = new MyFSDataInputStream(inputStream);
}
}
}
return myFSDataInputStream;
}
public static String readline(FileSystem fileStatus){
try {
// FSDataInputStream inputStream = fileStatus.open(remotePath);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
if ((line = bufferedReader.readLine()) != null){
bufferedReader.close();
inputStream.close();
return line;
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}
複製代碼
運行結果:
java.net.URL
和 org.apache.hadoop.fs.FsURLStreamHandlerFactory
編程完成輸出HDFS中指定文件的文本到終端中。import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;
import org.apache.hadoop.fs.Path;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
public class ShowTheContent {
private Path remotePath;
private FileSystem fileSystem;
public ShowTheContent(FileSystem fileSystem, Path remotePath){
this.fileSystem = fileSystem;
this.remotePath = remotePath;
}
public void show(){
try {
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
InputStream inputStream = new URL("hdfs","localhost",9000,remotePath.toString()).openStream();
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
while ((line = bufferedReader.readLine()) != null){
System.out.println(line);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
複製代碼
輸出結果:
歡迎關注本文做者:
掃碼關注並回復「乾貨」,獲取我整理的千G Android、iOS、JavaWeb、大數據、人工智能等學習資源。