github項目地址 | 做業項目地址 |
---|---|
結對編程夥伴博客地址 | 做業要求連接 |
做業要求地址 | 做業要求地址 |
(1)PSP表格html
PSP2.1 | Personal Software Process Stages | 預估耗時(分鐘) | 實際耗時(分鐘) |
---|---|---|---|
Planning | 計劃 | 30 | 30 |
· Estimate | · 估計這個任務須要多少時間 | 30 | 30 |
Development | 開發 | 770 | 720 |
· Analysis | · 需求分析 (包括學習新技術) | 300 | 240 |
· Design Spec | · 生成設計文檔 | 30 | 20 |
· Design Review | · 設計複審 (和同事審覈設計文檔) | 30 | 20 |
· Coding Standard | · 代碼規範 (爲目前的開發制定合適的規範) | 20 | 15 |
· Design | · 具體設計 | 30 | 30 |
· Coding | · 具體編碼 | 300 | 315 |
· Code Review | · 代碼複審 | 30 | 40 |
· Test | · 測試(自我測試,修改代碼,提交修改) | 30 | 40 |
Reporting | 報告 | 150 | 120 |
· Test Report | · 測試報告 | 60 | 45 |
· Size Measurement | · 計算工做量 | 30 | 30 |
· Postmortem & Process Improvement Plan | · 過後總結, 並提出過程改進計劃 | 60 | 45 |
合計 | 950 | 900 |
(2)代碼規範:node
命名規範:1.類名首字母要大寫,使用可以反映類功能的名詞或名詞短語命名類。git
2.類成員變量首單詞小寫,變量名前可加_前綴。github
3.方法名第一個字符要大寫,且應使用動詞或動詞短語。編程
4.參數首字符小寫,採用描述性參數名稱。函數
5.接口名稱要有意義,接口修飾符只能用public和internal。性能
6.每條語句至少佔一行,過長語句斷爲兩行顯示。學習
7.語句嵌套不超過3層。測試
詳細代碼規範見:[代碼規範this
結對過程:
emmm 真香。
結對編程的照片:
因爲二人對C#都並不熟悉,解題思路就是百度,每遇到一個問題就查,編寫邊學。
參考做業要求咱們將項目目標歸納爲以下幾點:
統計詞頻、字符數、行數
public class WordCalculate { public long charactersnumber = 0; //字符數 public long wordsnumber = 0; //單詞數 public long linesnumber = 0; //行數 public long phrasenumber = 0; //詞組數 //數據統計 public void Calculate(string dataline, WordTrie wtrie) { if (string.IsNullOrEmpty(dataline)) return; string word = null; for (int i = 0, len = dataline.Length; i < len; i++) { char unit = dataline[i]; if (unit >= 65 && unit <= 90){ unit = (char)(unit + 32); } //大寫轉小寫 if ((unit >= 48 && unit <= 57) || (unit >= 97 && unit <= 122)){ word = String.Concat(word, unit); } else{ if (!string.IsNullOrEmpty(word)){ //判斷是否爲詞尾後的字符 if (word[0] >= 97 && word[0] <= 122){ //首字符是否爲字母 wtrie.Insert(word); } word = null; } } } if (!string.IsNullOrEmpty(word)) //判斷行尾是否有單詞 { if (word[0] >= 97 && word[0] <= 122){ //首字符是否爲字母 wtrie.Insert(word); } word = null; } this.linesnumber++; //統計行數 this.wordsnumber += wtrie.CountSum; //統計單詞數 this.charactersnumber += dataline.Length; //統計字符數 } }
讀取文件
public WordCalculate Input(WordCalculate datanumber, WordTrie wtrie) { FileStream fs = null; StreamReader sr = null; String dataline = String.Empty; try { fs = new FileStream(this.pathIn, FileMode.Open); sr = new StreamReader(fs); while ((dataline = sr.ReadLine()) != null) { datanumber.Calculate(dataline, wtrie); //按行統計數據 } } catch { Console.WriteLine("文檔讀取失敗!"); } finally { if (sr != null) { sr.Close(); } if (fs != null) { fs.Close(); } } return datanumber; }
文件寫入
public void Output(WordCalculate datanumber, WordTrie wtrie,int n) { FileStream fs = null; StreamWriter sw = null; List<WordTrie.ListUnit> WordList = new List<WordTrie.ListUnit>(); try { fs = new FileStream(this.pathOut, FileMode.Create); sw = new StreamWriter(fs); WordList = wtrie.Sort(); sw.WriteLine(String.Concat("characters:", datanumber.charactersnumber, "\n")); sw.WriteLine(String.Concat("words:", datanumber.wordsnumber, "\n")); sw.WriteLine(String.Concat("lines:", datanumber.linesnumber, "\n")); sw.WriteLine("\n詞頻\t單詞\n"); Console.WriteLine(String.Concat("characters:", datanumber.charactersnumber)); Console.WriteLine(String.Concat("words:", datanumber.wordsnumber)); Console.WriteLine(String.Concat("lines:", datanumber.linesnumber, "\n")); //Console.WriteLine("\n詞頻\t單詞\n"); for (int i = 0; (i < n && i < datanumber.wordsnumber); i++) { sw.WriteLine(WordList[i].Word + ":" + String.Concat(WordList[i].WordNum)); Console.WriteLine(WordList[i].Word+":"+String.Concat(WordList[i].WordNum)); } } //catch { Console.WriteLine("文檔寫入失敗!"); } finally { if (sw != null) { sw.Close(); } if (fs != null) { fs.Close(); } } }
利用Trie樹統計詞頻
(按理說用字典類更好作的······ trie統計詞頻的效率會更高一點,可是徹底沒有辦法寫詞組,最後仍是向字典類底頭···)
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace wordCount { class WordTrie { //Trie樹節點 private class TrieNode { public int PrefixNum = 0; //前綴詞頻 public int WordNum = 0; //詞頻 public Dictionary<char, TrieNode> Sons = new Dictionary<char, TrieNode>(); //子節點 public bool IsEnd = false; //是否可爲終節點 public char Val; //節點值 public string Word = null; //單詞值 //構造函數 public TrieNode() { } public TrieNode(char val) { Val = val; } } private TrieNode _Root = new TrieNode(); //全部單詞詞頻總和 public int CountSum { get { return _Root.PrefixNum; } } //插入單詞 public void Insert(string word) { if (string.IsNullOrEmpty(word)) return; TrieNode node = _Root; node.PrefixNum++; for (int i = 0, len = word.Length; i < len; i++) { char pos = word[i]; if (!node.Sons.ContainsKey(pos)) { node.Sons[pos] = new TrieNode(pos); } node.Sons[pos].PrefixNum++; node = node.Sons[pos]; } node.Word = word; node.IsEnd = true; node.WordNum++; } //獲取前綴詞頻 public int PrefixCount(string prefix) { return GetCount(prefix, false); } //獲取單詞詞頻 public int WordCount(string word) { return GetCount(word, true); } private int GetCount(string str, bool isword) { if (string.IsNullOrEmpty(str)) return -1; TrieNode node = _Root; for (int i = 0, len = str.Length; i < len; i++) { char pos = str[i]; if (!node.Sons.ContainsKey(pos)) return 0; else node = node.Sons[pos]; } return isword ? node.WordNum : node.PrefixNum; } //是否包含指定的單詞 public bool ContainsWord(string word) { return WordCount(word) > 0; } //單詞表單元 public class ListUnit { public string Word; //單詞 public int WordNum; //詞頻 } //詞頻排序 public List<ListUnit> Sort() { TrieNode node = _Root; List<ListUnit> WordList = new List<ListUnit>(); WordList = WordPreOrder(node, WordList); //按詞頻降序排列,若詞頻相等按字典序排列 WordList.Sort((a, b) => { if (a.WordNum.CompareTo(b.WordNum) != 0) return -a.WordNum.CompareTo(b.WordNum); else return a.Word.CompareTo(b.Word); }); return WordList; } //單詞表生成(Trie樹的前序遍歷) private List<ListUnit> WordPreOrder(TrieNode node, List<ListUnit> WordList) { if (node.PrefixNum == 0) { return WordList; } if (node.WordNum != 0) { ListUnit unit = new ListUnit(); unit.Word = node.Word; unit.WordNum = node.WordNum; WordList.Add(unit); } foreach (char key in node.Sons.Keys) { WordList = WordPreOrder(node.Sons[key], WordList); } return WordList; } } }
關於Tire樹的相關參考以下:字典樹(trie樹)實現詞頻查找
詳見我隊友寫的博客
傳送門