最近負責一個項目,須要用到全文檢索,個人環境大致以下:node
## Minimal Sphinx configuration sample (clean, simple, functional)##數據源,src1爲名字,後面會引用這個名字source src1{type = mysqlsql_host = localhostsql_user = testsql_pass =sql_db = testsql_port = 3306 # optional, default is 3306sql_query = \SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \FROM documentssql_attr_uint = group_idsql_attr_timestamp = date_added}#test1爲索引名稱,sphinx檢索時須要這個名字,至關於關係數據庫中的tableindex test1{source = src1 #引用的數據源名稱path = @CONFDIR@/data/test1}index testrt{type = rtrt_mem_limit = 128Mpath = @CONFDIR@/data/testrtrt_field = titlert_field = contentrt_attr_uint = gid}indexer{mem_limit = 128M}searchd{listen = 9312listen = 9306:mysql41log = @CONFDIR@/log/searchd.logquery_log = @CONFDIR@/log/query.logread_timeout = 5max_children = 30pid_file = @CONFDIR@/log/searchd.pidseamless_rotate = 1preopen_indexes = 1unlink_old = 1workers = threads # for RT to workbinlog_path = @CONFDIR@/data}
D:\blue\sphinx-2.2.8-release-win64-full\bin>indexer -c ..\sphinx-min.conf.in --allSphinx 2.2.8-id64-release (r4942)Copyright (c) 2001-2015, Andrew AksyonoffCopyright (c) 2008-2015, Sphinx Technologies Inc (http://sphinxsearch.com)using config file '..\sphinx-min.conf.in'...indexing index 'test1'...collected 4 docs, 0.0 MBsorted 0.0 Mhits, 100.0% donetotal 4 docs, 33882 bytestotal 0.121 sec, 278900 bytes/sec, 32.92 docs/secskipping non-plain index 'testrt'...total 3 reads, 0.000 sec, 12.0 kb/call avg, 0.0 msec/call avgtotal 12 writes, 0.001 sec, 5.7 kb/call avg, 0.1 msec/call avg須要注意的是,若是須要創建的索引已經被使用,即已經啓動了searchd服務,就須要增長--rotate參數,相似於indexer -c ..\sphinx-min.conf.in --all --rotate
而後在同一目錄下運行 searchd -c ..\sphinx-min.conf.in,以下mysql
D:\blue\sphinx-2.2.8-release-win64-full\bin>searchd -c ..\sphinx-min.conf.inSphinx 2.2.8-id64-release (r4942)Copyright (c) 2001-2015, Andrew AksyonoffCopyright (c) 2008-2015, Sphinx Technologies Inc (http://sphinxsearch.com)using config file '..\sphinx-min.conf.in'...listening on all interfaces, port=9312listening on all interfaces, port=9306precaching index 'test1'rotating index 'test1': successprecaching index 'testrt'precached 2 indexes in 0.045 sec
沒有什麼錯誤,須要注意的是,須要先建立索引,才能啓動服務,不然可能會出錯,searchd命令也能夠安裝爲服務,之後使用起來會更加方便,這裏這麼作也是爲了看究竟是否配置成功,不然系統服務出錯,咱們看不到錯誤緣由。linux
Sphinx 2.2.8-id64-release (r4942)git
Copyright (c) 2001-2015, Andrew Aksyonoffgithub
Copyright (c) 2008-2015, Sphinx Technologies Inc (http://sphinxsearch.com)sql
using config file '..\sphinx-min.conf.in'...數據庫
indexing index 'test1'...npm
collected 4 docs, 0.0 MBubuntu
sorted 0.0 Mhits, 100.0% donewindows
total 4 docs, 303 bytes
total 0.086 sec, 3518 bytes/sec, 46.44 docs/sec
skipping non-plain index 'testrt'...
total 3 reads, 0.000 sec, 0.4 kb/call avg, 0.0 msec/call avg
total 12 writes, 0.001 sec, 0.2 kb/call avg, 0.0 msec/call avg
rotating indices: successfully sent SIGHUP to searchd (pid=4556).
"id" "group_id" "date_added""3" "2" "1427446411""4" "2" "1427446411"
這裏面有一個問題,能夠看出id 4實際上並無「重構」這個詞,只是包含「重」「構」這兩個字而已,因此可能沒法知足某些需求,可是好在Sphinx的默認匹配方式是短語類似度,因此理論上來講,包含「重構」這個詞的會排序在前面,簡單測試也是如此,是否一直如此就不知道了。能夠參考這篇文章:http://rainkid.blog.163.com/blog/static/165140840201010277223611/
var SphinxClient = require ("sphinxapi"),util = require('util'),assert = require('assert');var cl = new SphinxClient();cl.SetServer('localhost', 9312);cl.Query('重構','test1', function(err, result) {assert.ifError(err);console.log(util.inspect(result, false, null, true));});運行程序,node sphinx2.js,以下{ error: '',warning: '',status: [ 0 ],fields: [ 'title', 'content' ],attrs:[ [ 'group_id', 1 ],[ 'date_added', 2 ] ],matches:[ { id: 3,weight: 2,attrs: { group_id: 2, date_added: 1427446411 } },{ id: 4,weight: 1,attrs: { group_id: 2, date_added: 1427446411 } } ],total: 2,total_found: 2,time: 0.004,words:[ { word: '重', docs: 2, hits: 2 },{ word: '構', docs: 2, hits: 2 } ] }能夠看出和SphinxQL運行的效果同樣,只不過返回的信息更多而已。
2)SphinxQL
#sphinx.js
var mysql = require('mysql');var connection = mysql.createConnection({host : 'localhost',port : '9306'});connection.connect();var queryString = "SELECT * FROM test1 WHERE MATCH('重構')";connection.query(queryString, function(err, rows, fields) {if (err) throw err;for (var i in rows) {console.log(JSON.stringify(rows[i]));}});connection.end();運行程序,node sphinx.js,以下{"id":3,"group_id":2,"date_added":1427446411}{"id":4,"group_id":2,"date_added":1427446411}乍看起來,彷佛sphinxapi提供的信息更多,我沒有具體比較過,不過sphinxQL也包含了一些函數,如weight(),能夠返回權重,如執行SELECT *, weight() FROM test1 WHERE MATCH('重構'); 結果以下"id" "group_id" "date_added" "weight()""3" "2" "1427446411" "2557""4" "2" "1427446411" "1557"可知sphinxap提供的權重,彷佛是sphinxQL提供的值除以1000以後的值
三、CentOS的安裝和使用
$ yum install postgresql-libs unixODBC
$ rpm -Uhv sphinx-2.2.8.rhel6.x86_64.rpm
$ service searchd start
具體的使用和Windows是同樣的,沒有什麼區別。
四、其餘
indexer --merge DSTINDEX SRCINDEX [--rotate]
indexer --merge main delta --merge-dst-range deleted 0 0
2013.11.09 sphinx-for-chinese-2.2.1-dev-r4311-win32.zip2013.11.09 sphinx-for-chinese-2.2.1-dev-r4311.tar.gz
index test1{source = src1path = D:/blue/sphinx_data/data/test1docinfo = externcharset_type = utf-8chinese_dictionary = D:\blue\sphinx-for-chinese-2.2.1-dev-r4311-win32\xdict}其中charset_type = utf-8在最新的版本中已經廢棄,由於默認已是utf-8,xdict是一個字典文件