爲了能方便快速的獲取HDFS中的文件,簡單的搭建一個web服務提供下載很方便快速,並且在web服務器端不留臨時文件,只作stream中轉,效率至關高!
使用的框架是SpringMVC+HDFS APIweb
@Controller @RequestMapping("/file") public class FileDownloadController { private static final String BASE_DIR = "/user/app/dump/"; @RequestMapping(value = "/download/{filename}", method = RequestMethod.GET) @ResponseBody public void fileDownload(@PathVariable("filename") String fileName, HttpServletRequest request, HttpServletResponse response) { try { response.setContentType("application/octet-stream; charset=utf-8"); response.addHeader("Content-Disposition", "attachment; filename=" + URLEncoder.encode(fileName + ".csv", "UTF-8")); String path = BASE_DIR + fileName; HdfsUtils.copyFileAsStream(path, response.getOutputStream()); } catch (Exception e) { e.printStackTrace(); } } }
public class HdfsUtils { private static FileSystem hdfs = null; static { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); Configuration conf=new Configuration(); try { hdfs = FileSystem.get(URI.create("hdfs://xxxxxxx"), conf, "app"); } catch (Exception e) { e.printStackTrace(); } } public static void copyFileAsStream(String fpath, OutputStream out) throws IOException, InterruptedException { org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(fpath); FSDataInputStream fsInput = hdfs.open(path); IOUtils.copyBytes(fsInput, out, 4096, false); fsInput.close(); out.flush(); } }
是否是很是簡單? HDFS的文件流沒落在web服務上,而是直接copy到了瀏覽器的OutputStream上spring
修改 web端的代碼, 用zip進行壓縮,默認的壓縮比例是1:5,大大減小了流在網絡上傳輸量apache
@Controller @RequestMapping("/file") public class FileDownloadController { private static final String BASE_DIR = "/user/app/dump/"; @RequestMapping(value = "/download/zip/{filename}", method = RequestMethod.GET) @ResponseBody public void hdfsDownload2(@PathVariable("filename") String fileName, HttpServletRequest request, HttpServletResponse response) { try { response.setContentType("application/octet-stream; charset=utf-8"); response.setHeader("Content-Disposition", "attachment; filename=" + URLEncoder.encode(fileName + ".zip", "UTF-8")); ZipOutputStream zipOut = null; try { zipOut = new ZipOutputStream(new BufferedOutputStream(response.getOutputStream())); zipOut.putNextEntry(new ZipEntry(fileName + ".csv")); } catch (Exception e) { e.printStackTrace(); } String path = BASE_DIR + fileName; HdfsUtils.copyFileAsStream(path, zipOut); zipOut.close(); } catch (Exception e) { e.printStackTrace(); } } }
<properties> <spring.version>4.2.5.RELEASE</spring.version> <hadoop.version>2.7.0</hadoop.version> </properties> <dependencies> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-web</artifactId> <version>${spring.version}</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-webmvc</artifactId> <version>${spring.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop.version}</version> </dependency> </dependencies>