數據:結構化數據和非結構化數據linux
搜索數據算法
分詞:索引建立(Indexing)和搜索索引(Search)sql
底層數據結構數據庫
* **最後的結構** * **document Frequency**:文檔頻次,總共右多少文件包含詞 * **Frequency**:此文件中包含了**幾回此詞**
* 總體邏輯圖
@Test public void testDemo() throws IOException{ Analyzer analyzer = new StandardAnalyzer(); //存放在disk上 final Path path = Paths.get(targetFileIndex); Directory directory = FSDirectory.open(path); //寫入器 IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter iw = new IndexWriter(directory,iwc); Document doc = new Document(); String text = "This is the text to be indexed"; doc.add(new Field("fileName",text,TextField.TYPE_STORED)); iw.addDocument(doc); iw.close(); } @Test public void testFind() throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(); //從disk上讀取 final Path path = Paths.get(targetFileIndex); Directory directory = FSDirectory.open(path); DirectoryReader dr = DirectoryReader.open(directory); //索引查詢器 IndexSearcher isc = new IndexSearcher(dr); QueryParser parser = new QueryParser("filename",analyzer); Query query = parser.parse("text"); ScoreDoc[] hits = isc.search(query, 1000).scoreDocs;; for (int i = 0; i < hits.length; i++) { Document hitDoc = isc.doc(hits[i].doc); } dr.close(); directory.close(); }