package com.sohu.hot.vis.servlet; import org.apache.commons.lang.StringUtils; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.util.*; /** * 測試一下在文件中找單詞重複次數的,一個面試題 * * @author liweihan * @time 2016-12-28 16:48 */ public class TestHan { public static void main(String[] args) throws Exception { display("C:/java.txt"); display2("C:/java.txt"); } /** * @author liweihan * @time 2016/12/28 17:28 * @description 方法1:正則,分割,map * 缺點:對標點符號沒有辦法區分?! * @param path * @throws Exception */ public static void display(String path)throws Exception{ File file=new File(path); BufferedReader br = new BufferedReader(new FileReader(file)); String line = null; //定義一個map集合保存單詞和單詞出現的個數 TreeMap<String,Integer> tm = new TreeMap<String,Integer>(); //讀取文件 while((line=br.readLine())!=null){ line = line.toLowerCase(); String reg1 = "\\s+"; String reg2 ="\\w+"; //將讀取的文本進行分割 String str[] = line.split(reg1); for(String s: str){ if(s.matches(reg2)){ //判斷集合中是否已經存在該單詞,若是存在則個數加1,不然將單詞添加到集合中,且個數置爲1 if(!tm.containsKey(s)){ tm.put(s,1); }else{ tm.put(s,tm.get(s)+1); } } } } br.close(); System.out.println(tm); printResult(tm); } /** * @author liweihan * @time 2016/12/28 17:30 * @description 方法2 * @param path * @throws Exception */ public static void display2(String path)throws Exception{ File file=new File(path); BufferedReader br = new BufferedReader(new FileReader(file)); int tmpchar; StringBuilder exist=new StringBuilder(); Map<String,Integer> map=new TreeMap<String,Integer>(); //讀取文件 while((tmpchar=br.read())!=-1){ //讀取單個字符 if (isCharacter(tmpchar)) { //字母 exist.append((char) tmpchar); } else { Addword(exist.toString(),map); exist = new StringBuilder(); } } br.close(); System.out.println(map); printResult(map); } /** * @author liweihan * @time 2016/12/28 17:32 * @description 判斷是否爲字母 * @param tmpchar * @return */ public static boolean isCharacter(int tmpchar) { if(tmpchar >= 65 && tmpchar <= 90) { return true; } else if (tmpchar >= 97 && tmpchar <= 122) { return true; } else { return false; } } /** * @author liweihan * @time 2016/12/28 17:36 * @description 把字母組合成單詞,並統計單詞的數量 * @param str * @param map */ public static void Addword(String str,Map<String,Integer> map) { str = str.toLowerCase(); Integer count = map.get(str); if (count == null) { map.put(str,1); } else { map.put(str,count+1); } } /** * @author liweihan * @time 2016/12/28 18:34 * @description 打印map集合的數據,並排序 * @param map */ public static void printResult(Map<String,Integer> map) { List<Map.Entry<String,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet()); Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() { @Override public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { return (o2.getValue().compareTo(o1.getValue()) ); //降序排序,當o2小於、等於、大於o1時,返回-1,0,1 /** * * public int compareTo(BigInteger val) * * 將此 BigInteger 與指定的 BigInteger 進行比較。 * 對於針對六個布爾比較運算符 (<, ==, >, >=, !=, <=) 中的每個運算符的各個方法, * 優先提供此方法。執行這些比較的建議語句是: * (x.compareTo(y) <op> 0),其中 <op> 是六個比較運算符之一。 * * 當此 BigInteger 在數值上小於、等於或大於 val 時,返回 -1,0,或 1。 http://hanchaohan.blog.51cto.com/2996417/1734709 */ } }); /* int i = 10; Set<String> keySet = map.keySet(); Iterator<String> iter = keySet.iterator(); while(iter.hasNext() && i > 0) { String key = iter.next(); System.out.println( (String)key + " : " + map.get(key)); i--; }*/ for (int i = 0; i < 10; i++) { Map.Entry<String,Integer> entry = list.get(i); if (entry == null) { return; } System.out.println(entry.getKey() + " : " + entry.getValue()); } } }
輸出結果:html
{a=4, all=1, along=1, always=2, and=6, appreciate=2, are=2, around=2, bad=1, based=1, be=1, begins=1, best=1, brighten=1, brighter=1, brightest=1, can=1, chance=1, comes=1, crying=1, day=1, do=1, dream=1, dreams=1, ends=1, enough=1, everyone=2, everything=1, failures=1, feel=1, for=2, forgotten=1, from=1, future=1, go=2, happen=1, happiest=1, happiness=1, have=7, hope=1, hug=1, hurts=2, if=1, importance=1, in=4, is=2, it=1, just=3, keep=1, kiss=1, know=1, let=2, lies=1, life=4, lifeuntil=1, make=6, mean=1, message=1, miss=2, moments=1, most=1, much=1, necessarily=1, need=1, of=6, on=3, one=4, only=2, opportunity=1, or=1, other=1, out=1, past=1, people=3, pick=1, probably=1, put=1, really=2, see=1, send=1, side=1, smile=1, smiling=1, so=2, someone=1, something=1, sorrow=1, that=6, the=8, their=3, them=3, there=1, they=1, things=2, this=2, those=8, to=15, touched=2, trials=1, want=6, was=1, way=1, well=1, were=2, what=2, when=4, where=1, who=9, will=3, with=4, you=26, your=4, yourself=1} you : 26 to : 15 who : 9 the : 8 those : 8 have : 7 and : 6 make : 6 of : 6 that : 6 {need=1, sorrow=1, =37, they=2, person=1, don=3, for=3, everything=2, chance=1, forgotten=1, people=3, of=6, are=2, on=3, only=2, something=1, yourself=1, along=1, happen=1, everyone=2, smile=2, ends=1, others=1, or=1, happiness=2, bad=1, always=2, them=3, will=3, go=4, born=1, put=1, lies=1, most=1, grows=1, life=4, side=1, happy=1, probably=1, best=1, do=2, pick=1, happiest=1, want=6, just=3, failures=1, may=1, let=2, a=4, crying=2, really=2, t=4, s=1, what=2, nothing=1, down=1, based=1, keep=1, heartaches=1, to=19, send=1, dreams=1, hug=1, enough=4, where=1, who=10, necessarily=1, love=1, kiss=1, were=2, please=1, future=1, tear=1, someone=2, when=5, friendship=1, brighten=1, brighter=1, smiling=2, trials=1, live=1, be=3, shoes=1, dream=2, so=2, begins=1, lifeuntil=1, moments=1, comes=1, much=1, and=8, that=6, sweet=1, importance=1, strong=1, lives=1, day=1, this=2, make=6, hope=1, appreciate=2, other=1, can=2, have=7, one=4, way=2, tried=1, well=1, from=1, re=1, was=1, because=1, another=1, real=1, if=2, touched=2, die=1, mean=1, all=1, too=1, hurts=2, feel=1, is=2, those=9, with=4, it=3, your=4, you=32, the=10, past=2, know=1, in=4, cry=1, around=2, human=1, message=2, opportunity=1, hurt=1, see=1, brightest=1, there=1, things=2, their=3, worry=1, searched=1, miss=2, out=1} : 37 you : 32 to : 19 who : 10 the : 10 those : 9 and : 8 have : 7 of : 6 want : 6