要求一:java
英語的26 個字母的頻率在一本小說中是如何分佈的?某類型文章中常出現的單詞是什麼?某做家最經常使用的詞彙是什麼?《飄》 中最經常使用的短語是什麼,等等。數組
輸出某個英文文本文件中 26 字母出現的頻率,由高到低排列,並顯示字母出現的百分比,精確到小數點後面兩位。緩存
注:一、字母頻率 = 這個字母出現的次數 / (全部A-Z,a-z字母出現的總數)學習
二、若是兩個字母出現的頻率同樣,那麼就按照字典序排列。測試
源代碼以下:spa
package classTest; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; public class wordcount1 { static String str = ""; static String str1 = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; static char ch1[] = str1.toCharArray(); public static double num[] = new double[100]; public static int sum = 0; public static void read() { File file = new File("Harry Potter and the Sorcerer's Stone.txt"); try { FileReader r = new FileReader(file); BufferedReader br = new BufferedReader(r); str = br.readLine(); while (str != null) { for (int j = 0; j < str.length(); j++) { for (int k = 0; k < str1.length(); k++) { if (str.charAt(j) == str1.charAt(k)) { sum++; num[k]++; } } } str = br.readLine(); } br.close(); for (int p = 0; p < str1.length() - 1; p++) { int o = p; for (int q = p; q < str1.length(); q++) { if (num[o] < num[q]) { o = q; } } if (o != p) { char ff = ch1[o]; ch1[o] = ch1[p]; ch1[p] = ff; double fff = num[o]; num[o] = num[p]; num[p] = fff; } } for (int k = 0; k < str1.length(); k++) { num[k] = num[k] / sum * 100; System.out.print(ch1[k]); System.out.printf(":%.2f", num[k]); System.out.println("%"); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { read(); } }
運行結果code
要求二:blog
輸出單個文件中的前 N 個最常出現的英語單詞。排序
做用:一個用於統計文本文件中的英語單詞出現頻率的控制檯程序;遞歸
單詞:以英文字母開頭,由英文字母和字母數字符號組成的字符串視爲一個單詞。單詞以分隔符分割且不區分大小寫。在輸出時,全部單詞都用小寫字符表示。
源代碼以下:
package classTest; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.Scanner; public class wordcount2 { private static String str = ""; private static Scanner sc = new Scanner(System.in); private static BufferedReader cin = null; private static String a[] = new String[1000000]; private static String c[] = new String[10000000]; private static int b[] = new int[1000000]; private static int length = 0; private static int length1 = 0; private static int nn = 0; private static int j = 0; public static void cun() throws IOException {// 將單詞存到數組a { while (str != null) { int i = 0; str = str.toLowerCase(); // 把大寫改爲小寫 for (i = 0; i < str.length(); i++) { if ((str.charAt(i) > 96 && str.charAt(i) < 123)) { a[j] = a[j] + str.charAt(i); } if (str.charAt(i) == ' ' || str.charAt(i) == ',' || str.charAt(i) == '.') { if (!a[j].equals("")) { j = j + 1; a[j] = ""; } } } str = cin.readLine(); } length = j; } } public static void show() {// 顯示 for (int k = 0; k < nn; k++) { System.out.print(c[k] + "\t" + b[k] + " "); // System.out.printf("%.2f",(double)b[k]/length1*100); // System.out.print("%"); System.out.println(""); } } public static void Sorting() {// 排序 int t3 = 0; int t2 = 0; String sr = ""; for (int i = 0; i < length1 - 1; i++) { t3 = i; for (int j = i + 1; j < length1; j++) { if (b[t3] < b[j]) { t3 = j; } } if (t3 != i) { t2 = b[i]; b[i] = b[t3]; b[t3] = t2; sr = c[i]; c[i] = c[t3]; c[t3] = sr; } } } public static void Statistics() {// 去重 for (int k = 0; k < length; k++) { b[k] = 0; } c[0] = a[0]; int tt = 1; Boolean rt = true; for (int i = 1; i < length; i++) { rt = false; for (int j = 0; j < tt; j++) { if (a[i].equals(c[j])) { rt = true; break; } } if (!rt) { c[tt] = a[i]; tt++; } } length1 = tt; for (int i = 0; i < length1; i++) { for (int j = 0; j < length; j++) { if (c[i].equals(a[j])) { b[i]++; } } } } public static void Readfile() { File file = new File("Harry Potter and the Sorcerer's Stone.txt"); try { InputStreamReader read = new InputStreamReader(new FileInputStream(file), "UTF-8"); cin = new BufferedReader(read); str = cin.readLine(); cun(); cin.close(); read.close(); } catch (IOException e) { System.out.println("讀取失敗!"); e.printStackTrace(); } } public static void main(String[] args) throws IOException { System.out.println("請輸入須要統計的個數:"); nn = sc.nextInt(); a[0] = ""; Readfile(); Statistics(); Sorting(); show(); } }
運行結果
要求三
輸出文件中全部不重複的單詞,按照出現次數由多到少排列,出現次數一樣多的,以字典序排列。 英文字母:A-Z,a-z 字母數字符號:A-Z,a-z,0-9 分割符:空格,非字母數字符號 例:good123是一個單詞,123good不是一個單詞。good,Good和GOOD是同一個單詞
源代碼:
package classTest; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; public class wordcount3 { private static String str=""; private static BufferedReader cin=null; private static String a[]=new String[1000000]; private static String c[]=new String[10000000]; private static int b[]=new int[1000000]; private static int length=0; private static int length1=0; private static int j=0; public static void cun() throws IOException {//將單詞存到數組a { while(str!=null) { int i=0; str=str.toLowerCase(); //把大寫改爲小寫 for(i=0;i<str.length();i++) { if((str.charAt(i)>96&&str.charAt(i)<123)) { a[j]=a[j]+str.charAt(i); } if(str.charAt(i)==' '||str.charAt(i)==','||str.charAt(i)=='.') { if(!a[j].equals("")) { j=j+1; a[j]=""; } } } str=cin.readLine(); } length=j; } } public static void Sorting() {//排序 int t3=0; int t2=0; String sr=""; for(int i=0;i<length1-1;i++) { t3=i; for(int j=i+1;j<length1;j++) { if(b[t3]<b[j]) { t3=j; } } if(t3!=i) { t2=b[i]; b[i]=b[t3]; b[t3]=t2; sr=c[i]; c[i]=c[t3]; c[t3]=sr; } } } public static void Statistics(){//去重 for(int k=0;k<length;k++) { b[k]=0; } c[0]=a[0]; int tt=1; Boolean rt=true; for(int i=1;i<length;i++) { rt=false; for(int j=0;j<tt;j++) { if(a[i].equals(c[j])) { rt=true; break; } } if(!rt) { c[tt]=a[i]; tt++; } } length1=tt; for(int i=0;i<length1;i++) { for(int j=0;j<length;j++) { if(c[i].equals(a[j])) { b[i]++; } } } } public static void Readfile() { File file=new File("Harry Potter and the Sorcerer's Stone.txt"); try { InputStreamReader read = new InputStreamReader(new FileInputStream(file),"UTF-8"); cin=new BufferedReader(read); str=cin.readLine(); cun(); cin.close(); read.close(); } catch(IOException e) { System.out.println("讀取失敗!"); e.printStackTrace(); } } public static void Writefile() throws IOException { File file=new File("t1.txt"); if(!file.exists()) file.createNewFile(); FileWriter write = new FileWriter(file,true); BufferedWriter out=new BufferedWriter(write); for(int i=0;i<length1;i++){ out.write("這是第"+(i+1)+"個: "); double f4=(double)b[i]/length1*100; out.write(c[i]+"\t"+b[i]+"\t"+f4); out.write("\r\n"); } out.close(); } public static void show1() { for(int k=0;k<length1;k++) { System.out.print(c[k]+"\t \t\t"+b[k]+"\n"); } }public static void main(String[] args) throws IOException { a[0]=""; Readfile(); Statistics(); Sorting(); System.out.println("程序中因此不重複的單詞!"); show1(); Writefile(); } }
由於輸出全部單詞超過緩衝區所能緩存的範圍,因而我採用文件輸出的方式:
要求四:
指定文件目錄,對目錄下每個文件執行 功能1的操做
源代碼:
package classTest; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; public class wordcount4 { private static BufferedReader cin = null; private static String a[] = new String[1000000]; private static String c[] = new String[10000000]; private static int b[] = new int[1000000]; private static int length = 0; private static int length1 = 0; static File[] list = new File("F:\\JAVA").listFiles(); public static void Sorting() {// 排序 int t3 = 0; int t2 = 0; String sr = ""; for (int i = 0; i < length1 - 1; i++) { t3 = i; for (int j = i + 1; j < length1; j++) { if (b[t3] < b[j]) { t3 = j; } } if (t3 != i) { t2 = b[i]; b[i] = b[t3]; b[t3] = t2; sr = c[i]; c[i] = c[t3]; c[t3] = sr; } } } public static void Statistics() {// 去重 for (int k = 0; k < length; k++) { b[k] = 0; } c[0] = a[0]; int tt = 1; Boolean rt = true; for (int i = 1; i < length; i++) { rt = false; for (int j = 0; j < tt; j++) { if (a[i].equals(c[j])) { rt = true; break; } } if (!rt) { c[tt] = a[i]; tt++; } } length1 = tt; for (int i = 0; i < length1; i++) { for (int j = 0; j < length; j++) { if (c[i].equals(a[j])) { b[i]++; } } } } public static void Readfile() { File file = new File("Harry Potter and the Sorcerer's Stone.txt"); try { InputStreamReader read = new InputStreamReader(new FileInputStream(file), "UTF-8"); cin = new BufferedReader(read); cin.close(); read.close(); } catch (IOException e) { System.out.println("讀取失敗!"); e.printStackTrace(); } } public static void show1() { for (int k = 0; k < length1; k++) { System.out.print(c[k] + "\t \t\t" + b[k] + " "); System.out.printf("%.2f", (double) b[k] / length1 * 100); System.out.print("%"); System.out.println(""); } } public static void rode1(File[] list) { for (File file : list) { if (file.isFile()) { a[length++] = file.getAbsolutePath(); } } } public static void main(String[] args) throws IOException { rode1(list); Statistics(); Sorting(); show1(); } }
運行結果
要求五:
指定文件目錄, 可是會遞歸遍歷目錄下的全部子目錄,每一個文件執行功能1的操做。
源代碼:
package classTest; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; public class wordcount5 { private static BufferedReader cin = null; private static String a[] = new String[1000000]; private static String c[] = new String[10000000]; private static int b[] = new int[1000000]; private static int length = 0; private static int length1 = 0; static File[] list = new File("F:\\Java (學習用)").listFiles(); public static void Sorting() {// 排序 int t3 = 0; int t2 = 0; String sr = ""; for (int i = 0; i < length1 - 1; i++) { t3 = i; for (int j = i + 1; j < length1; j++) { if (b[t3] < b[j]) { t3 = j; } } if (t3 != i) { t2 = b[i]; b[i] = b[t3]; b[t3] = t2; sr = c[i]; c[i] = c[t3]; c[t3] = sr; } } } public static void Statistics() {// 去重 for (int k = 0; k < length; k++) { b[k] = 0; } c[0] = a[0]; int tt = 1; Boolean rt = true; for (int i = 1; i < length; i++) { rt = false; for (int j = 0; j < tt; j++) { if (a[i].equals(c[j])) { rt = true; break; } } if (!rt) { c[tt] = a[i]; tt++; } } length1 = tt; for (int i = 0; i < length1; i++) { for (int j = 0; j < length; j++) { if (c[i].equals(a[j])) { b[i]++; } } } } public static void Readfile() { File file = new File("Harry Potter and the Sorcerer's Stone.txt"); try { InputStreamReader read = new InputStreamReader(new FileInputStream(file), "UTF-8"); cin = new BufferedReader(read); cin.close(); read.close(); } catch (IOException e) { System.out.println("讀取失敗!"); e.printStackTrace(); } } public static void show1() { for (int k = 0; k < length1; k++) { System.out.print(c[k] + "\t \t\t" + b[k] + " "); System.out.printf("%.2f", (double) b[k] / length1 * 100); System.out.print("%"); System.out.println(""); } } public static void rode1(File[] list) { for (File file : list) { if (file.isFile()) { a[length++] = file.getAbsolutePath(); } else if (file.isDirectory()) { String str3 = file.getAbsolutePath(); list = new File(str3).listFiles(); rode1(list); } } } public static void main(String[] args) throws IOException { rode1(list); Statistics(); Sorting(); show1(); } }
運行結果