KWIC索引系統接受一些行,每行有若干字,每一個字由若干字符組成;每行均可以循環移位。重複地把第一個字刪除,而後接到行末; KWIC把全部行的各類移位狀況按照字母表順序輸出。java
在網上找了一個基於管道過濾器的實現,但發現有好象錯誤,修改了一下使之正確,如下是代碼:ide
Filter類this
package com.jason.kwic; import java.io.IOException; public abstract class Filter implements Runnable { // 定義輸入管道 protected Pipe input; // 定義輸出管道 protected Pipe output; private boolean isStart = false; Filter(Pipe input, Pipe output) { this.input = input; this.output = output; } // 防止屢次調用,調用以後線程開始執行 public void start() { if (!isStart) { isStart = true; Thread thread = new Thread(this); thread.start(); } } // 線程的 run 方法 public void run() { try { this.transform(); } catch (IOException e) { e.getMessage(); } } // 將輸入數據轉換爲所需數據並寫入輸出管道 // 由子類實現抽象方法 protected abstract void transform() throws IOException; }
Pipe類spa
package com.jason.kwic; import java.io.IOException; import java.io.PipedReader; import java.io.PipedWriter; import java.io.PrintWriter; import java.util.Scanner; public class Pipe { //輸入管道 private Scanner pipereader; //輸出管道 private PrintWriter pipewriter; public Pipe(){ PipedWriter pw = new PipedWriter(); PipedReader pr = new PipedReader(); try{ pw.connect(pr); } catch (IOException e){ e.getMessage(); } pipewriter = new PrintWriter(pw); pipereader = new Scanner(pr); } //讀入一行數據到管道 //@return 讀入的數據 public String readerLine() throws IOException{ if (pipereader.hasNextLine()) { return pipereader.nextLine(); } return null; } //從管道輸出一行數據 public void writerLine(String strline) throws IOException{ pipewriter.println(strline); } //將讀管道關閉,調用該方法後,不能再從管道中讀數據 //如不能關閉則拋出異 public void closeReader() throws IOException{ pipereader.close(); } //先刷新數據,在將寫管道關閉,調用該方法後,不能向管道中寫數據 //如不能關閉則拋出異常 public void closeWriter() throws IOException{ pipewriter.flush(); pipewriter.close(); } }
Input類:線程
package com.jason.kwic; import java.io.File; import java.io.IOException; import java.util.Scanner; public class Input extends Filter{ //輸入文件的文件名 private File infile; Input(File file, Pipe output){ super(null, output); this.infile = file; } @Override //讀取數據 protected void transform() throws IOException { Scanner sc = new Scanner(infile); String templine = ""; while(sc.hasNextLine()){ templine = sc.nextLine(); //System.out.println("Input:" + templine); output.writerLine(templine); } output.closeWriter(); sc.close(); } }
Shift類:code
package com.jason.kwic; import java.io.IOException; import java.util.ArrayList; public class Shift extends Filter{ //單詞的列表 private ArrayList<String> wordlist = new ArrayList<String>(); //重組後的行的列表 private ArrayList<String> linelist = new ArrayList<String>(); Shift(Pipe input, Pipe output){ super(input, output); } @Override protected void transform() throws IOException { String templine = ""; //讀數據 while((templine = input.readerLine()) != null){ //將數據拆分爲不一樣單詞 this.lineSplitWord(templine); //將單詞重組爲句子 this.recombination(); //輸出重組結果 for(int i = 0; i < linelist.size(); i++){ //System.out.println("linelist:" + linelist.get(i)); output.writerLine(linelist.get(i)); } //清空wordlist、linelist和templine wordlist.clear(); linelist.clear(); templine = ""; } input.closeReader(); output.closeWriter(); } //從一行中提取單詞存入單詞表中 private void lineSplitWord(String line){ String word = ""; int i = 0; while(i < line.length()){ if(line.charAt(i) != ' '){ word += line.charAt(i); } else{ wordlist.add(word); word = ""; } i++; } if (word.length() > 0) { wordlist.add(word); } } private void recombination(){ for(int j = 0; j < wordlist.size(); j++){ String templine = ""; for (int k = wordlist.size() - 1 - j; k < wordlist.size(); k++){ templine += wordlist.get(k) + " "; } for (int m = 0; m < wordlist.size() - 1 - j; m++){ if(m != wordlist.size() - j - 2){ templine += wordlist.get(m) + " "; } else{ templine += wordlist.get(m); } } linelist.add(templine); } } }
Alphabetizer類:orm
package com.jason.kwic; import java.io.IOException; //import java.text.Collator; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; //import java.util.Locale; public class Alphabetizer extends Filter{ private ArrayList<String> al = new ArrayList<String>(); Alphabetizer(Pipe input, Pipe output){ super(input, output); } //對讀入的數據進行排序 protected void transform() throws IOException { String templine = null; //讀入數據 while((templine = input.readerLine()) != null){ al.add(templine); } //按字母表排序 Collections.sort(al, new AlphaabetizerComparator()); //對排序後的數據進行輸出 for(int i = 0; i < al.size(); i++){ output.writerLine(al.get(i)); } input.closeReader(); output.closeWriter(); } //使用java提供的Collator類來實現比較 // private class AlphaabetizerComparator implements Comparator<String> { // // private Collator collator; // AlphaabetizerComparator(){ // this.collator = Collator.getInstance(Locale.ENGLISH); // } // // @Override // public int compare(String o1, String o2) { // return this.collator.compare(o1, o2); // } // // } //本身寫代碼實現比較(使用字母的ascii值來進行比較) private class AlphaabetizerComparator implements Comparator<String> { @Override public int compare(String o1, String o2) { if (o1 == null || o2 == null) { throw new NullPointerException(); } int compareValue = 0; char o1FirstCharacter = o1.charAt(0); char o2FirstCharacter = o2.charAt(0); if(this.isLetter(o1FirstCharacter) && this.isLetter(o2FirstCharacter)) { //若是是小寫的字母的值,則轉成對應的大寫的字母的值 o1FirstCharacter = this.toUpperCase(o1FirstCharacter); o2FirstCharacter = this.toUpperCase(o2FirstCharacter); compareValue = o1FirstCharacter - o2FirstCharacter; } else { throw new RuntimeException("必須是字母"); } return compareValue; } private boolean isLetter(char c) { return (c >= 65 && c <= 90) || (c >= 97 && c <= 122); } private char toUpperCase(char c) { if (Character.isLowerCase(c)) { return Character.toUpperCase(c); } return c; } } }
Output類:排序
package com.jason.kwic; import java.io.File; import java.io.IOException; import java.io.PrintWriter; public class Output extends Filter{ //輸出文件的文件名 private File file; Output(Pipe input, File file){ super(input, null); this.file = file; } //輸出數據 protected void transform() throws IOException { PrintWriter pw = new PrintWriter(file); String templine = ""; while((templine = input.readerLine()) != null){ pw.write(templine); pw.write("\n"); } pw.flush(); pw.close(); input.closeReader(); } }
Main主程序索引
package com.jason.kwic; import java.io.File; import java.util.Scanner; public class Main { public static void main(String[] args) { File infile = new File("d:\\temp\\mykwic_in.txt"); File outfile = new File("d:\\temp\\mykwic_out.txt"); Scanner inputfile; Scanner outputfile; try { inputfile = new Scanner(infile); outputfile = new Scanner(outfile); // 定義三個管道 Pipe pipe1 = new Pipe(); Pipe pipe2 = new Pipe(); Pipe pipe3 = new Pipe(); // 定義四種過濾器 Input input = new Input(infile, pipe1); Shift shift = new Shift(pipe1, pipe2); Alphabetizer alph = new Alphabetizer(pipe2, pipe3); Output output = new Output(pipe3, outfile); // 啓動四種過濾器的線程 // input.start(); // shift.start(); // alph.start(); // output.start(); //不啓用線程,順序執行四個過濾器 input.transform(); shift.transform(); alph.transform(); output.transform(); // 直接輸出結果 System.out.println("----- infile -----"); String str = null; while (inputfile.hasNextLine()) { str = inputfile.nextLine(); System.out.println(str); } System.out.println("input end"); //啓用線程時要讓當前線程睡一段時間. //Thread.sleep(3000); System.out.println("----- outfile -----"); while (outputfile.hasNextLine()) { str = outputfile.nextLine(); System.out.println(str); } inputfile.close(); outputfile.close(); } catch (Exception e) { // e.getMessage(); e.printStackTrace(); } } }
注意其中的infile和outfile對應的路徑要修改爲實際的路徑,而且必須存在這兩個文件。ip