java操做hadoop真的比python麻煩太多,試了很久一直不成功,今天實驗成功,作一下筆記
1 做爲初學者必定要導入common和HDFS目錄的全部庫,要不出現不少依賴庫問題,庫的位置在下載後的源碼解壓獲得,好比我安裝的是hadoop2.84,解壓後的路徑是位置在G:\project\hadoop\hadoop-2.8.4\,把G:\project\hadoop\hadoop-2.8.4\share\hadoop\common\lib、G:\project\hadoop\hadoop-2.8.4\share\hadoop\common、G:\project\hadoop\hadoop-2.8.4\share\hadoop\hdfs和G:\project\hadoop\hadoop-2.8.4\share\hadoop\hdfs\lib下面的jar包所有導入項目
2 項目根路徑建立文件log4j.properties,內容以下(要不提示日誌文件不存在的東東,而後在代碼裏面加入BasicConfigurator.configure();):
# Configure logging for testing: optionally with log file
#log4j.rootLogger=debug,appender
log4j.rootLogger=info,appender
#log4j.rootLogger=error,appender
#\u8F93\u51FA\u5230\u63A7\u5236\u53F0
log4j.appender.appender=org.apache.log4j.ConsoleAppender
#\u6837\u5F0F\u4E3ATTCCLayout
log4j.appender.appender.layout=org.apache.log4j.TTCCLayoutjava
3測試代碼以下,在項目中新建test.java文件,裏面的hadoop2.com換成core-site.xml配置文件裏面fs.defaultFS的配置:node
package WordCount;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;python
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.log4j.BasicConfigurator;apache
public class test {windows
public static void main(String[] args) {
// TODO 自動生成的方法存根
System.out.println("Hello World ");
BasicConfigurator.configure();
try {
listAllFile();
} catch (Exception e) {
// TODO 自動生成的 catch 塊
e.printStackTrace();
}
System.out.println("Hello World ");app
}
/**
* 獲取HDFS文件系統
* @return
* @throws IOException
* @throws URISyntaxException
*/
public static FileSystem getFileSystem() throws IOException, URISyntaxException{
//read config file
Configuration conf = new Configuration();dom
//返回默認文件系統
//若是在Hadoop集羣下運行,使用此種方法能夠直接獲取默認文件系統
// FileSystem fs = FileSystem.get(conf);socket
//指定的文件系統地址
URI uri = new URI("hdfs://hadoop2.com:9000");分佈式
//返回指定的文件系統
//若是在本地測試,須要使用此種方法獲取文件系統
FileSystem fs = FileSystem.get(uri, conf);oop
return fs;
}
/**
* 建立文件目錄
* @throws Exception
*/
public static void mkdir() throws Exception{
//獲取文件系統
FileSystem fs = getFileSystem();
//建立文件目錄
fs.mkdirs(new Path("hdfs://hadoop2.com:9000/test/weibo"));
//釋放資源
fs.close();
}
/**
* 刪除文件或者文件目錄
* @throws Exception
*/
public static void rmdir() throws Exception{
//獲取文件系統
FileSystem fs = getFileSystem();
//刪除文件或者文件目錄
fs.delete(new Path("hdfs://hadoop2.com:9000/test/weibo"), true);
//釋放資源
fs.close();
}
/**
* 獲取目錄下全部文件
* @throws Exception
*/
public static void listAllFile() throws Exception{
//獲取文件系統
FileSystem fs = getFileSystem();
//列出目錄內容
FileStatus[] status = fs.listStatus(new Path("hdfs://hadoop2.com:9000/test/"));
//獲取目錄下全部文件路徑
Path[] listedPaths = FileUtil.stat2Paths(status);
//循環讀取每一個文件
for (Path path : listedPaths) {
System.out.println(path);
}
//釋放資源
fs.close();
}
/**
* 將文件上傳至HDFS
* @throws Exception
*/
public static void copyToHDFS() throws Exception{
//獲取文件對象
FileSystem fs = getFileSystem();
//源文件路徑是Linux下的路徑 Path srcPath = new Path("/home/hadoop/temp.jar");
//若是須要在windows下測試,須要改成Windows下的路徑,好比 E://temp.jar
Path srcPath = new Path("E://temp.jar");
//目的路徑
Path dstPath = new Path("hdfs://hadoop2.com:9000/test/weibo");
//實現文件上傳
fs.copyFromLocalFile(srcPath, dstPath);
//釋放資源
fs.close();
}
/**
* 從HDFS上下載文件
* @throws Exception
*/
public static void getFile() throws Exception{
//得到文件系統
FileSystem fs = getFileSystem();
//源文件路徑
Path srcPath = new Path("hdfs://hadoop2.com:9000/test/weibo/temp.jar");
//目的路徑,默認是Linux下的
//若是在Windows下測試,須要改成Windows下的路徑,如C://User/andy/Desktop/
Path dstPath = new Path("D://");
//下載HDFS上的文件
fs.copyToLocalFile(srcPath, dstPath);
//釋放資源
fs.close();
}
/**
* 獲取HDFS集羣點的信息
* @throws Exception
*/
public static void getHDFSNodes() throws Exception{
//獲取文件系統
FileSystem fs = getFileSystem();
//獲取分佈式文件系統
DistributedFileSystem hdfs = (DistributedFileSystem)fs;
//獲取全部節點
DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();
//循環比遍歷
for (int i = 0; i < dataNodeStats.length; i++) {
System.out.println("DataNote_" + i + "_Name:" + dataNodeStats[i].getHostName());
}
//釋放資源
fs.close();
}
/**
* 查找某個文件在HDFS集羣的位置
* @throws Exception
*/
public static void getFileLocal() throws Exception{
//獲取文件系統
FileSystem fs = getFileSystem();
//文件路徑
Path path = new Path("hdfs://hadoop2.com:9000/test/weibo/temp.jar");
//獲取文件目錄
FileStatus fileStatus = fs.getFileStatus(path);
//獲取文件塊位置列表
BlockLocation[] blockLocations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
//循環輸出塊信息
for (int i = 0; i < blockLocations.length; i++) {
String[] hosts = blockLocations[i].getHosts();
System.out.println("block_" + i + "_location:" + hosts[0]);
}
//釋放資源
fs.close();
}
}
4結果:Hello World 0 [main] DEBUG org.apache.hadoop.metrics2.lib.MutableMetricsFactory - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginSuccess with annotation @org.apache.hadoop.metrics2.annotation.Metric(about=, always=false, sampleName=Ops, type=DEFAULT, value=[Rate of successful kerberos logins and latency (milliseconds)], valueName=Time)24 [main] DEBUG org.apache.hadoop.metrics2.lib.MutableMetricsFactory - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginFailure with annotation @org.apache.hadoop.metrics2.annotation.Metric(about=, always=false, sampleName=Ops, type=DEFAULT, value=[Rate of failed kerberos logins and latency (milliseconds)], valueName=Time)25 [main] DEBUG org.apache.hadoop.metrics2.lib.MutableMetricsFactory - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.getGroups with annotation @org.apache.hadoop.metrics2.annotation.Metric(about=, always=false, sampleName=Ops, type=DEFAULT, value=[GetGroups], valueName=Time)25 [main] DEBUG org.apache.hadoop.metrics2.lib.MutableMetricsFactory - field private org.apache.hadoop.metrics2.lib.MutableGaugeLong org.apache.hadoop.security.UserGroupInformation$UgiMetrics.renewalFailuresTotal with annotation @org.apache.hadoop.metrics2.annotation.Metric(about=, always=false, sampleName=Ops, type=DEFAULT, value=[Renewal failures since startup], valueName=Time)25 [main] DEBUG org.apache.hadoop.metrics2.lib.MutableMetricsFactory - field private org.apache.hadoop.metrics2.lib.MutableGaugeInt org.apache.hadoop.security.UserGroupInformation$UgiMetrics.renewalFailures with annotation @org.apache.hadoop.metrics2.annotation.Metric(about=, always=false, sampleName=Ops, type=DEFAULT, value=[Renewal failures since last successful login], valueName=Time)27 [main] DEBUG org.apache.hadoop.metrics2.impl.MetricsSystemImpl - UgiMetrics, User and group related metrics117 [main] DEBUG org.apache.hadoop.security.authentication.util.KerberosName - Kerberos krb5 configuration not found, setting default realm to empty123 [main] DEBUG org.apache.hadoop.security.Groups - Creating new Groups object129 [main] DEBUG org.apache.hadoop.util.NativeCodeLoader - Trying to load the custom-built native-hadoop library...152 [main] DEBUG org.apache.hadoop.util.NativeCodeLoader - Loaded the native-hadoop library153 [main] DEBUG org.apache.hadoop.security.JniBasedUnixGroupsMapping - Using JniBasedUnixGroupsMapping for Group resolution153 [main] DEBUG org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback - Group mapping impl=org.apache.hadoop.security.JniBasedUnixGroupsMapping202 [main] DEBUG org.apache.hadoop.security.Groups - Group mapping impl=org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback; cacheTimeout=300000; warningDeltaMs=5000211 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - hadoop login212 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - hadoop login commit213 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - Using user: "server" with name server213 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - User entry: "server"213 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - Assuming keytab is managed externally since logged in from subject.214 [main] DEBUG org.apache.hadoop.security.UserGroupInformation - UGI loginUser:server (auth:SIMPLE)250 [main] DEBUG org.apache.htrace.core.Tracer - sampler.classes = ; loaded no samplers413 [main] DEBUG org.apache.htrace.core.Tracer - span.receiver.classes = ; loaded no span receivers841 [main] DEBUG org.apache.hadoop.hdfs.client.impl.DfsClientConf - dfs.client.use.legacy.blockreader.local = false841 [main] DEBUG org.apache.hadoop.hdfs.client.impl.DfsClientConf - dfs.client.read.shortcircuit = false841 [main] DEBUG org.apache.hadoop.hdfs.client.impl.DfsClientConf - dfs.client.domain.socket.data.traffic = false841 [main] DEBUG org.apache.hadoop.hdfs.client.impl.DfsClientConf - dfs.domain.socket.path = 854 [main] DEBUG org.apache.hadoop.hdfs.DFSClient - Sets dfs.client.block.write.replace-datanode-on-failure.min-replication to 0890 [main] DEBUG org.apache.hadoop.io.retry.RetryUtils - multipleLinearRandomRetry = null929 [main] DEBUG org.apache.hadoop.ipc.Server - rpcKind=RPC_PROTOCOL_BUFFER, rpcRequestWrapperClass=class org.apache.hadoop.ipc.ProtobufRpcEngine$RpcProtobufRequest, rpcInvoker=org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker@489115ef1166 [main] DEBUG org.apache.hadoop.ipc.Client - getting client out of cache: org.apache.hadoop.ipc.Client@b2c9a9c1784 [main] DEBUG org.apache.hadoop.util.PerformanceAdvisory - Both short-circuit local reads and UNIX domain socket are disabled.1790 [main] DEBUG org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataTransferSaslUtil - DataTransferProtocol not using SaslPropertiesResolver, no QOP found in configuration for dfs.data.transfer.protection1849 [main] DEBUG org.apache.hadoop.ipc.Client - The ping interval is 60000 ms.1860 [main] DEBUG org.apache.hadoop.ipc.Client - Connecting to hadoop2.com/192.168.129.130:90001969 [IPC Client (2093010349) connection to hadoop2.com/192.168.129.130:9000 from server] DEBUG org.apache.hadoop.ipc.Client - IPC Client (2093010349) connection to hadoop2.com/192.168.129.130:9000 from server: starting, having connections 11973 [IPC Parameter Sending Thread #0] DEBUG org.apache.hadoop.ipc.Client - IPC Client (2093010349) connection to hadoop2.com/192.168.129.130:9000 from server sending #0 org.apache.hadoop.hdfs.protocol.ClientProtocol.getListing1984 [IPC Client (2093010349) connection to hadoop2.com/192.168.129.130:9000 from server] DEBUG org.apache.hadoop.ipc.Client - IPC Client (2093010349) connection to hadoop2.com/192.168.129.130:9000 from server got value #01984 [main] DEBUG org.apache.hadoop.ipc.ProtobufRpcEngine - Call: getListing took 170mshdfs://hadoop2.com:9000/test/outputhdfs://hadoop2.com:9000/test/start.txthdfs://hadoop2.com:9000/test/test.txt2027 [main] DEBUG org.apache.hadoop.ipc.Client - stopping client from cache: org.apache.hadoop.ipc.Client@b2c9a9c2028 [main] DEBUG org.apache.hadoop.ipc.Client - removing client from cache: org.apache.hadoop.ipc.Client@b2c9a9c2028 [main] DEBUG org.apache.hadoop.ipc.Client - stopping actual client because no more references remain: org.apache.hadoop.ipc.Client@b2c9a9c2028 [main] DEBUG org.apache.hadoop.ipc.Client - Stopping client2028 [IPC Client (2093010349) connection to hadoop2.com/192.168.129.130:9000 from server] DEBUG org.apache.hadoop.ipc.Client - IPC Client (2093010349) connection to hadoop2.com/192.168.129.130:9000 from server: closed2028 [IPC Client (2093010349) connection to hadoop2.com/192.168.129.130:9000 from server] DEBUG org.apache.hadoop.ipc.Client - IPC Client (2093010349) connection to hadoop2.com/192.168.129.130:9000 from server: stopped, remaining connections 0Hello World 2131 [Thread-2] DEBUG org.apache.hadoop.util.ShutdownHookManager - ShutdownHookManger complete shutdown.