package com.whh.bigdata.test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import java.io.*; import java.net.URI; import java.net.URISyntaxException; /** * Created by whh on 2018/2/7. */ public class HdfsFile { /** * 往local文件裏寫數據 */ public static void appendToFile(String sDestFile, String sContent) { // String sContent = "I love Ysm"; // String sDestFile = "F:/work/logParse/autoCreateHql/myWrite.txt"; File destFile = new File(sDestFile); BufferedWriter out = null; if (!destFile.exists()) { try { destFile.createNewFile(); } catch (IOException e) { e.printStackTrace(); } } try { out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sDestFile, true))); out.write(sContent); out.newLine(); } catch (Exception e) { e.printStackTrace(); } finally { try { if (out != null) { out.close(); } } catch (IOException e) { e.printStackTrace(); } } } public static void main(String[] args) { Configuration conf = new Configuration();//定義配置文件 FileSystem hdfs = null;//文件系統變量 StringBuffer sb = new StringBuffer(); Path path=new Path("/"); //獲取某目錄下的文件列表+文件大小 try { hdfs = FileSystem.get(new URI("hdfs://BIGDATA-HADOOP-02.whh.net:8022"), conf, "bigdata"); FileStatus[] listStatus = hdfs.listStatus(path); Path[] listPath = FileUtil.stat2Paths(listStatus); //列出一級目錄下的文件列表 for (Path p : listPath) {// FileStatus[] pStatus = hdfs.listStatus(p); // Path[] listP = FileUtil.stat2Paths(pStatus); for (Path p2 : listP) { String str=p.toString()+"\t"+p2.toString()+"\t" +hdfs.getContentSummary(p2).getLength()+ "\t" +hdfs.getContentSummary(p2).getSpaceConsumed(); String strre= str.replace("hdfs://BIGDATA-HADOOP-02.whh.net:8022",""); System.out.println(strre); // appendToFile("F:/work/HDFSfilePro.txt",str); } // String fileNamePath = p.toString();//得到帶路徑的文件名字符串 // // //會根據集羣的配置輸出,例如我這裏輸出3G // System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getSpaceConsumed()); // // 顯示實際的輸出,例如這裏顯示 1G // System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getLength()); } } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (URISyntaxException e) { e.printStackTrace(); } } }
以上代碼有一個小bug,當執行到沒有權限的目錄時,for循環會退出,須要try ...catch一下,修改以下:java
for (Path p : listPath) { FileStatus[] pStatus = hdfs.listStatus(p); Path[] listP = FileUtil.stat2Paths(pStatus); String strre=""; for (Path p2 : listP) { try { String str = p.toString() + "\t" + p2.toString() + "\t" + hdfs.getContentSummary(p2).getLength() + "\t" + hdfs.getContentSummary(p2).getSpaceConsumed(); strre = str.replace("hdfs://BIGDATA-HADOOP-02.whh.net:8022", ""); System.out.println(strre); } catch (AccessControlException e) { System.out.println(p.toString() + "\t" + p.toString()); } catch (IOException e) { e.printStackTrace(); } appendToFile("F:/work/HDFSfilePro-2018-02-08.txt",strre); } // String fileNamePath = p.toString();//得到帶路徑的文件名字符串 // // //會根據集羣的配置輸出,例如我這裏輸出3G // System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getSpaceConsumed()); // // 顯示實際的輸出,例如這裏顯示 1G // System.out.println("SIZE OF THE HDFS DIRECTORY : " + hdfs.getContentSummary(p).getLength()); }