對於org.apache.hadoop.fs.Path來講, java
path.getName只是文件名,不包括路徑 apache
path.getParent也只是父文件的文件名,一樣不包括路徑 ide
hdfs.createNewFile(new Path(fileName))
new File(fileName).createNewFile
#本地文件系統在建立filename對應的文件時,若是相關的文件夾不存在,程序會終止,報錯:「java.io.IOException: 沒有那個文件或目錄」測試
package util
import java.io.{FileSystem => _, _}
import org.apache.hadoop.fs._
import scala.collection.mutable.ListBuffer
* Created by zls on 16-11-24.
object HDFSHelper {
def isDir(hdfs : FileSystem, name : String) : Boolean = {
hdfs.isDirectory(new Path(name))
def isDir(hdfs : FileSystem, name : Path) : Boolean = {
def isFile(hdfs : FileSystem, name : String) : Boolean = {
hdfs.isFile(new Path(name))
def isFile(hdfs : FileSystem, name : Path) : Boolean = {
def createFile(hdfs : FileSystem, name : String) : Boolean = {
hdfs.createNewFile(new Path(name))
def createFile(hdfs : FileSystem, name : Path) : Boolean = {
def createFolder(hdfs : FileSystem, name : String) : Boolean = {
hdfs.mkdirs(new Path(name))
def createFolder(hdfs : FileSystem, name : Path) : Boolean = {
def exists(hdfs : FileSystem, name : String) : Boolean = {
hdfs.exists(new Path(name))
def exists(hdfs : FileSystem, name : Path) : Boolean = {
def transport(inputStream : InputStream, outputStream : OutputStream): Unit ={
val buffer = new Array[Byte](64 * 1000)
var len = inputStream.read(buffer)
while (len != -1) {
outputStream.write(buffer, 0, len - 1)
len = inputStream.read(buffer)
class MyPathFilter extends PathFilter {
override def accept(path: Path): Boolean = true
* create a target file and provide parent folder if necessary
def createLocalFile(fullName : String) : File = {
val target : File = new File(fullName)
val index = fullName.lastIndexOf(File.separator)
val parentFullName = fullName.substring(0, index)
val parent : File = new File(parentFullName)
else if(!parent.isDirectory)
* delete file in hdfs
* @return true: success, false: failed
def deleteFile(hdfs : FileSystem, path: String) : Boolean = {
if (isDir(hdfs, path))
hdfs.delete(new Path(path), true)//true: delete files recursively
hdfs.delete(new Path(path), false)
* get all file children's full name of a hdfs dir, not include dir children
* @param fullName the hdfs dir's full name
def listChildren(hdfs : FileSystem, fullName : String, holder : ListBuffer[String]) : ListBuffer[String] = {
val filesStatus = hdfs.listStatus(new Path(fullName), new MyPathFilter)
for(status <- filesStatus){
val filePath : Path = status.getPath
holder += filePath.toString
listChildren(hdfs, filePath.toString, holder)
def copyFile(hdfs : FileSystem, source: String, target: String): Unit = {
val sourcePath = new Path(source)
val targetPath = new Path(target)
if(!exists(hdfs, targetPath))
createFile(hdfs, targetPath)
val inputStream : FSDataInputStream = hdfs.open(sourcePath)
val outputStream : FSDataOutputStream = hdfs.create(targetPath)
transport(inputStream, outputStream)
def copyFolder(hdfs : FileSystem, sourceFolder: String, targetFolder: String): Unit = {
val holder : ListBuffer[String] = new ListBuffer[String]
val children : List[String] = listChildren(hdfs, sourceFolder, holder).toList
for(child <- children)
copyFile(hdfs, child, child.replaceFirst(sourceFolder, targetFolder))
def copyFileFromLocal(hdfs : FileSystem, localSource: String, hdfsTarget: String): Unit = {
val targetPath = new Path(hdfsTarget)
if(!exists(hdfs, targetPath))
createFile(hdfs, targetPath)
val inputStream : FileInputStream = new FileInputStream(localSource)
val outputStream : FSDataOutputStream = hdfs.create(targetPath)
transport(inputStream, outputStream)
def copyFileToLocal(hdfs : FileSystem, hdfsSource: String, localTarget: String): Unit = {
val localFile : File = createLocalFile(localTarget)
val inputStream : FSDataInputStream = hdfs.open(new Path(hdfsSource))
val outputStream : FileOutputStream = new FileOutputStream(localFile)
transport(inputStream, outputStream)
def copyFolderFromLocal(hdfs : FileSystem, localSource: String, hdfsTarget: String): Unit = {
val localFolder : File = new File(localSource)
val allChildren : Array[File] = localFolder.listFiles
for(child <- allChildren){
val fullName = child.getAbsolutePath
val nameExcludeSource : String = fullName.substring(localSource.length)
val targetFileFullName : String = hdfsTarget + Path.SEPARATOR + nameExcludeSource
copyFileFromLocal(hdfs, fullName, targetFileFullName)
copyFolderFromLocal(hdfs, fullName, targetFileFullName)
def copyFolderToLocal(hdfs : FileSystem, hdfsSource: String, localTarget: String): Unit = {
val holder : ListBuffer[String] = new ListBuffer[String]
val children : List[String] = listChildren(hdfs, hdfsSource, holder).toList
val hdfsSourceFullName = hdfs.getFileStatus(new Path(hdfsSource)).getPath.toString
val index = hdfsSourceFullName.length
for(child <- children){
val nameExcludeSource : String = child.substring(index + 1)
val targetFileFullName : String = localTarget + File.separator + nameExcludeSource
copyFileToLocal(hdfs, child, targetFileFullName)
package util
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import scala.collection.mutable.ListBuffer
* Created by zls on 16-11-24.
object HDFSOperator {
def start(args: Array[String]): Unit = {
val hdfs : FileSystem = FileSystem.get(new Configuration)
args(0) match {
case "list" => traverse(hdfs, args(1))
case "createFile" => HDFSHelper.createFile(hdfs, args(1))
case "createFolder" => HDFSHelper.createFolder(hdfs, args(1))
case "copyfile" => HDFSHelper.copyFile(hdfs, args(1), args(2))
case "copyfolder" => HDFSHelper.copyFolder(hdfs, args(1), args(2))
case "delete" => HDFSHelper.deleteFile(hdfs, args(1))
case "copyfilefrom" => HDFSHelper.copyFileFromLocal(hdfs, args(1), args(2))
case "copyfileto" => HDFSHelper.copyFileToLocal(hdfs, args(1), args(2))
case "copyfolderfrom" => HDFSHelper.copyFolderFromLocal(hdfs, args(1), args(2))
case "copyfolderto" => HDFSHelper.copyFolderToLocal(hdfs, args(1), args(2))
def traverse(hdfs : FileSystem, hdfsPath : String) = {
val holder : ListBuffer[String] = new ListBuffer[String]
val paths : List[String] = HDFSHelper.listChildren(hdfs, hdfsPath, holder).toList
for(path <- paths){
System.out.println("--------- path = " + path)
System.out.println("--------- Path.getname = " + new Path(path).getName)