統計文章中字母出現頻率:abcdsedgfcvfjghvhgbjava
class test1{ String zimu;//出現的字母 int cishu;//字母出現的次數 public test1(String zimu,int cishu) { this.zimu = zimu; this.cishu = cishu; } public String getZimu() { return zimu; } public void setZimu(String zimu) { this.zimu = zimu; } public int getCishu() { return cishu; } public void setCishu(int cishu) { this.cishu = cishu; } } public class ZimuCollect { public static void collect() throws IOException { try { //IO操做讀取文件內容 FileReader fr = new FileReader("file.txt"); BufferedReader br = new BufferedReader(fr); HashMap<String, Integer> map = new HashMap<String, Integer>(); String string =null; Integer count = 0;//每一個字母的次數 Integer total = 0;//總共多少個字母 while ((string=br.readLine())!=null) { char[] ch = string.toCharArray();//將獲取的string分紅字符數組 total = total + ch.length; for (int i = 0; i < ch.length; i++) { ch[i] = Character.toLowerCase(ch[i]);//將全部的字母變成小寫的 count = map.get(ch[i]+""); if (count == null) {//字母沒有出現重複; count = 1; }else {//字母出現重複,count+1; count++; } map.put(ch[i]+"", count); } } List<test1> result = new ArrayList<>(); test1 e = null; for (String str : map.keySet()) { e = new test1(str,map.get(str)); result.add(e); } result.sort((test1 e1,test1 e2)->{ return e2.getCishu()-e1.getCishu();}); for(entity ee : result) { System.out.println("字母"+ee.getZimu()+"在文章中出現"+ee.getCishu()+"次,其頻率爲"+String.format("%.2f",ee.getCishu()*1.0/total)); } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static void main(String[] args) throws IOException { try{ ZimuCollect zimucollect = new ZimuCollect(); ZimuCollect.collect(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
思路: 數組
讀取文件,將讀取到的放在字符數組裏,先將其所有變爲小寫,用map的統計方法統計其出現次數,最後遍歷便可。緩存
統計文章中單詞出現的頻率:app
package Test; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Scanner; import java.util.TreeMap; class test2{ String danci;//出現的單詞 int cishu;//單詞對應出現的次數 public test2(String zimu,int cishu) { this.danci = zimu; this.cishu = cishu; } public String getDanci() { return danci; } public int getCishu() { return cishu; } } public class DanciCollect { public static boolean judgeNouse(String str) throws IOException { boolean flag = true; FileReader fr = new FileReader("judge.txt"); BufferedReader bf = new BufferedReader(fr); String str1; // 按行讀取字符串 while ((str1 = bf.readLine()) != null) { if(str.equals(str1)) { flag = false; } } bf.close(); fr.close(); return flag; } public static String toLowerCase(String str) { char []StringArr = str.toCharArray(); for (int i = 0; i < StringArr.length; i++) { StringArr[i] = Character.toLowerCase(StringArr[i]); } StringBuffer sb = new StringBuffer(); for(int i = 0;i < StringArr.length;i++) { sb.append(StringArr[i]); } String str1 = sb.toString(); return str1; } public static void collect1() throws IOException { try { File file1 = new File("piao.txt");//定義一個file對象,用來初始化FileReader FileReader reader1 = new FileReader(file1);//定義一個fileReader對象,用來初始化BufferedReader BufferedReader bReader1 = new BufferedReader(reader1);//new一個BufferedReader對象,將文件內容讀取到緩存 StringBuilder sb1 = new StringBuilder();//定義一個字符串緩存,將字符串存放緩存中 String s1 = ""; while ((s1 =bReader1.readLine()) != null) {//逐行讀取文件內容,不讀取換行符和末尾的空格 sb1.append(s1);//將讀取的字符串添加換行符後累加p存放在緩存中 } bReader1.close(); String text = sb1.toString(); int i=0; String[] array = {".",",","?","!",":","‘","’","「","」","—",";","-"}; for (int j = 0; j < array.length; j++) { text = text.replace(array[j]," "); //將text中的array數組中包含的特殊字符用空格代替 } String[] textArray = text.split(" "); //根據空格將text分割並存放在textArray中 Map<String, Integer> map = new TreeMap<String, Integer>(); Integer count = 0;//每一個字母的次數 Integer total = 0;//總共多少個字母 while(i < textArray.length) { String str = toLowerCase(textArray[i]); if(!judgeNouse(str)) { total = total + 1; count = map.get(str+""); if (count == null) {//單詞沒有出現重複; count = 1; }else {//單詞出現重複,count+1; count++; } map.put(str+"", count); i++; } else { i++; } } List<test2> result = new ArrayList<>(); test2 e = null; for (String str : map.keySet()) { e = new test2(str,map.get(str)); result.add(e); } result.sort((test2 e1,test2 e2)->{ return e2.getCishu()-e1.getCishu();}); System.out.println("文章共計"+total+"個單詞"); for(int ii = 0 ; ii < result.size();ii++) { System.out.println(result.get(ii).getDanci()+"在文章中出現"+result.get(ii).getCishu()+"次,其頻率爲"+String.format("%.2f",result.get(ii).getCishu()*1.0/total)); } } catch (FileNotFoundException e) { e.printStackTrace(); } } public static void main(String args[]) throws IOException { try { DanciCollect dancicollect = new DanciCollect(); DanciCollect.collect1(); }catch (FileNotFoundException e) { e.printStackTrace(); } } }
思路:函數
讀取文件中的內容,用append(s)方法將每次讀取的內容追加到緩存,將緩存內容放在字符串裏,將特殊符號放到一個數組裏,而後將這些特殊符號用空格代替,用split(" ")方法將其分紅一個個單詞存進字符串數組,而後遍歷統計單詞頻率便可。ui
若去掉無用詞,在存進map以前用一個函數判斷,不是無用詞即存進map裏,而後用map來統計單詞頻率,轉換成List數組,用sort函數排序,輸出頻率最高的前n個單詞this
此次的代碼不是我本身寫的,是我借鑑的別人的,由於我本身的那種統計字母與統計單詞是徹底分開的,不能一個方法來寫這兩個題,因而從別人那裏我學會了使用map方法……code