在寫spark程序是,須要使用HDFS api網hdfs上寫入文件,這個時候出現了下面的錯誤提示:java
16/05/03 18:41:50 WARN scheduler.TaskSetManager: Lost task 33.0 in stage 4.0 (TID 396, 192.168.1.14): java.io.IOException: Filesystem closed at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:795) at org.apache.hadoop.hdfs.DFSOutputStream.flushInternal(DFSOutputStream.java:2123) at org.apache.hadoop.hdfs.DFSOutputStream.close(DFSOutputStream.java:2230) at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72) at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106) at com.bigdata.HDFSUtil.createFile(HDFSUtil.scala:55) at com.bigdata.SparkMain$.partition(SparkMain.scala:169) at com.bigdata.SparkMain$$anonfun$10.apply(SparkMain.scala:66) at com.bigdata.SparkMain$$anonfun$10.apply(SparkMain.scala:65) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$18.apply(RDD.scala:723) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$18.apply(RDD.scala:723) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
具體緣由和解決辦法,參考個人另一篇博客apache