github地址java
PSP2.1 | Personal Software Process Stages | 預估耗時(分鐘) | 實際耗時(分鐘) |
---|---|---|---|
Planning | 計劃 | 10 | 10 |
• Estimate | • 估計這個任務須要多少時間 | 440 | 540 |
Development | • 開發 | 410 | 500 |
• Analysis | • 需求分析 (包括學習新技術) | 120 | 180 |
• Design Spec | • 生成設計文檔 | 10 | 10 |
• Design Review | • 設計複審 | 10 | 10 |
• Coding Standard | • 代碼規範 (爲目前的開發制定合適的規範) | 10 | 10 |
• Design | • 具體設計 | 20 | 20 |
• Coding | • 具體編碼 | 180 | 120 |
• Code Review | • 代碼複審 | 30 | 60 |
• Test | • 測試(自我測試,修改代碼,提交修改) | 30 | 90 |
Reporting | 報告 | 30 | 40 |
• Test Repor | • 測試報告 | 10 | 10 |
• Size Measurement | • 計算工做量 | 10 | 10 |
• Postmortem & Process Improvement Plan | • 過後總結, 並提出過程改進計劃 | 10 | 20 |
合計 | 450 | 550 |
此次的需求是關於讀寫文件的,因此能夠大體分爲讀文件,數據處理,寫數據三個模塊。讀寫文件方面,我以爲java自帶的方法就能夠完成,因此這一塊的任務就是查找該怎麼用java自帶的方法。數據處理方面,主要須要關注的是單詞數的統計,個人思路是將先將單詞所有轉爲小寫,過濾掉長度不足的字符串,並判斷前四位是不是字母,這樣就留下了符合要求的字符。git
代碼有三個類,一個是文件相關的FileUtil類,目前只有經過路徑取得文件的功能,一個是lib類,裏面有主要的實現方法,最後是Main類,用於運行代碼。github
import java.io.File; import java.io.FileNotFoundException; /** * @author 031602435 xyq * @version 1 * */ public class FileUtil { /** * @param path * @return file */ public File getFile(String path) { File file = new File(path); if (!file.exists()) { System.out.println("file not found"); } System.out.println("locate:"+path); return file; } }
import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; /** * @author 031602435 xyq * @version 1 * */ public class lib { public static String encoding = "UTF-8"; /**字符數量計數器 * * @param file * @return charnum * @throws Exception * @throws FileNotFoundException */ public int charCounter(File file) throws Exception,FileNotFoundException { InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file), encoding); BufferedReader bufferedReader = new BufferedReader(new FileReader(file)); int charnum = 0; String str = null; while ((str = bufferedReader.readLine()) != null) { //String s = bufferedReader.readLine(); charnum += str.length(); } //System.out.println("char:"+charnum); inputStreamReader.close(); return charnum; } /**行數計數器 * * @param file * @return linenum * @throws Exception * @throws FileNotFoundException */ public int lineCounter(File file) throws Exception, FileNotFoundException { InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file), encoding); BufferedReader bufferedReader = new BufferedReader(new FileReader(file)); int linenum = 0; String str = null; while ((str = bufferedReader.readLine()) != null) { //String s = bufferedReader.readLine(); if (!str.isEmpty()) { linenum ++; } } //System.out.println("line:"+linenum); inputStreamReader.close(); return linenum; } /**單詞數量計數器 * * @param file * @return wordsnum * @throws Exception * @throws FileNotFoundException */ public int wordsCounter(File file) throws Exception, FileNotFoundException { InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file), encoding); BufferedReader bufferedReader = new BufferedReader(new FileReader(file)); int wordsnum = 0; String str = null; Map<String, Integer> map = new HashMap<String,Integer>(); while ((str = bufferedReader.readLine()) != null) { //String s = bufferedReader.readLine(); String splited[] = str.split(",|\\.| |\\?|\\!|\\'"); for (int i = 0; i < splited.length; i++) { if (splited[i].length() >= 4 ) { String temp = splited[i].substring(0, 4); temp = temp.replaceAll("[^a-zA-Z]", ""); if (temp.length() >= 4) { if (map.containsKey(splited[i].toLowerCase())) { map.put(splited[i].toLowerCase(), map.get(splited[i].toLowerCase())+1); } else { map.put(splited[i].toLowerCase(), 1); } } } } } wordsnum = map.size(); //System.out.println("words:"+wordsnum); inputStreamReader.close(); return wordsnum; } /**單詞統計器 * * @param file * @return List<Map.Entry<String, Integer>> * @throws Exception * @throws FileNotFoundException */ public List<Map.Entry<String, Integer>> wordsNumCounter(File file) throws Exception, FileNotFoundException { InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file), encoding); BufferedReader bufferedReader = new BufferedReader(new FileReader(file)); Map<String, Integer> map = new HashMap<String,Integer>(); String str = null; while ((str = bufferedReader.readLine()) != null) { //String s = bufferedReader.readLine(); String splited[] = str.split(",|\\.| |\\?|\\!|\\'"); for (int i = 0; i < splited.length; i++) { if (splited[i].length() >= 4 ) { String temp = splited[i].substring(0, 4); temp = temp.replaceAll("[^a-zA-Z]", ""); if (temp.length() >= 4) { if (map.containsKey(splited[i].toLowerCase())) { map.put(splited[i].toLowerCase(), map.get(splited[i].toLowerCase())+1); } else { map.put(splited[i].toLowerCase(), 1); } } } } } List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(map.entrySet()); // 經過比較器來實現排序 Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() { @Override public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { // 降序排序 return o2.getValue().compareTo(o1.getValue()); } }); inputStreamReader.close(); return list; } /**寫入文件 * * @param charcount * @param linecount * @param wordscount * @param list * @return * @throws Exception * */ public void writeFile(int charcount,int linecount,int wordscount,List<Map.Entry<String, Integer>> list) throws Exception { StringBuilder result = new StringBuilder(""); result.append("characters:"); result.append(charcount+linecount-1); result.append("\r\n"); result.append("words"); result.append(wordscount); result.append("\r\n"); result.append("lines"); result.append(linecount); result.append("\r\n"); int count = 0; for(Map.Entry<String, Integer> map : list) { count++; result.append("<" + map.getKey() + ">:" + map.getValue()); if (count>9) { break; } result.append("\r\n"); } //String pathname = "D:\\java_project\\wordcount_0910\\src\\result.txt"; String pathname = System.getProperty("user.dir")+"\\result.txt"; //System.out.println("out.locate:"+pathname); File file = new File(pathname); if (!file.exists()) { System.out.println("not found result.txt"); System.out.println("create result.txt"); file.createNewFile(); } FileWriter filewriter = new FileWriter(file.getAbsoluteFile()); //System.out.println("absolutely path:"+file.getAbsolutePath()); BufferedWriter bufferedWriter = new BufferedWriter(filewriter); bufferedWriter.write(result.toString()); bufferedWriter.close(); } }
import java.io.File; import java.util.List; import java.util.Map; /** * @author 031602435 xyq * @version 1 * */ public class Main { public static void main(String[] args) throws Exception { lib l = new lib(); FileUtil fileUtil = new FileUtil(); //String path = "D:\\java_project\\wordcount_0910\\src\\wordcount_0910\\input.txt"; String path = args[0]; File file = fileUtil.getFile(path); int charcount = l.charCounter(file); int wordscount = l.wordsCounter(file); int linecount = l.lineCounter(file); List<Map.Entry<String, Integer>> list = l.wordsNumCounter(file); l.writeFile(charcount, linecount, wordscount, list); System.out.println("finished"); } }
大概就是把數據處理的接口分開寫了吧。考慮到之後可能有的改進要求,因此幾個數據項的統計功能單獨寫,方便之後修改。app
共進行了13個單元測試,其中字符,行數,單詞數測試三次本身寫的測試文檔,和一次助教發在羣中的測試文本;詞頻測試本身寫的和羣文件中的測試文本各一次。
出錯的都是進行的羣文件中的測試,大概知道錯誤緣由是漏了一些分隔符的判斷,添加了一些分隔符以後結果更接近答案了但仍是差一點點,接着會繼續找缺了哪些分隔符吧。。若是老師能直接給出全部具體的分隔符就更好了。。eclipse
收穫大概有如下幾點ide
除了收穫還有其餘不足的地方吧,由於以前作Mapreduce測試的時候有用過自帶的Wordcount測試樣例,因此潛意識以爲此次做業花不了多少時間就能完成,致使預留的時間不夠,轉眼就發現快到DDL了。。此次的實踐能夠說就是草草地收場,只把代碼寫了出來,至於像代碼優化,單元測試,都沒有作,做業博客也沒有時間認真寫了。還有就是在編碼的過程當中仍是沒有作到有進展就pull到倉庫裏面,到最後寫完才一塊兒pull進去。
之後的實踐任務會盡可能留足時間去完成的好一些吧,也但願在接下來的實踐任務中可以養成良好的編碼習慣。性能