遍歷HDFS目錄,並輸出文件大小

package com.whh.bigdata.test;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * Created by whh on 2018/2/7.
 */
public class HdfsFile {
    /**
     * 往local文件裏寫數據
     */
    public static void appendToFile(String sDestFile, String sContent) {
        // String sContent = "I love Ysm";
        // String sDestFile = "F:/work/logParse/autoCreateHql/myWrite.txt";
        File destFile = new File(sDestFile);
        BufferedWriter out = null;
        if (!destFile.exists()) {
            try {
                destFile.createNewFile();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        try {
            out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sDestFile, true)));
            out.write(sContent);
            out.newLine();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (out != null) {
                    out.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    public static void main(String[] args) {
        Configuration conf = new Configuration();//定義配置文件
        FileSystem hdfs = null;//文件系統變量
        StringBuffer sb = new StringBuffer();
        Path path=new Path("/");


        //獲取某目錄下的文件列表+文件大小
        try {
            hdfs = FileSystem.get(new URI("hdfs://BIGDATA-HADOOP-02.whh.net:8022"), conf, "bigdata");
            FileStatus[] listStatus = hdfs.listStatus(path);

            Path[] listPath = FileUtil.stat2Paths(listStatus);  //列出一級目錄下的文件列表
            for (Path p : listPath) {//
                FileStatus[] pStatus = hdfs.listStatus(p); //

                Path[] listP = FileUtil.stat2Paths(pStatus);
                for (Path p2 : listP) {
                    String str=p.toString()+"\t"+p2.toString()+"\t"
                            +hdfs.getContentSummary(p2).getLength()+ "\t"
                            +hdfs.getContentSummary(p2).getSpaceConsumed();
                   String strre= str.replace("hdfs://BIGDATA-HADOOP-02.whh.net:8022","");
                         System.out.println(strre);



                //    appendToFile("F:/work/HDFSfilePro.txt",str);
                }
//                String fileNamePath = p.toString();//得到帶路徑的文件名字符串
//
//                //會根據集羣的配置輸出,例如我這裏輸出3G
//                System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getSpaceConsumed());
//                // 顯示實際的輸出,例如這裏顯示 1G
//                System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getLength());
            }

        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }

}

以上代碼有一個小bug,當執行到沒有權限的目錄時,for循環會退出,須要try ...catch一下,修改以下:java

for (Path p : listPath) {
                FileStatus[] pStatus = hdfs.listStatus(p); 

                Path[] listP = FileUtil.stat2Paths(pStatus);
                String strre="";
                for (Path p2 : listP) {
                try {
                    String str = p.toString() + "\t" + p2.toString() + "\t"
                            + hdfs.getContentSummary(p2).getLength() + "\t"
                            + hdfs.getContentSummary(p2).getSpaceConsumed();
                    strre = str.replace("hdfs://BIGDATA-HADOOP-02.whh.net:8022", "");
                    System.out.println(strre);

                } catch (AccessControlException e) {
                    System.out.println(p.toString() + "\t" + p.toString());
                    } catch (IOException e) {
                e.printStackTrace();
            }


                    appendToFile("F:/work/HDFSfilePro-2018-02-08.txt",strre);
                }
//                String fileNamePath = p.toString();//得到帶路徑的文件名字符串
//
//                //會根據集羣的配置輸出,例如我這裏輸出3G
//                System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getSpaceConsumed());
//                // 顯示實際的輸出,例如這裏顯示 1G
//                System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getLength());
            }
相關文章
相關標籤/搜索