phpcms全文檢索功能實現(集成sphinx)

sphinx配置

sphinx是俄羅斯人開發的一個搜索引擎,基於c++編寫,具備強大的檢索能力,自己支持中文單個字符的檢索,中文分詞須要額外的插件Coreseek,但該插件已好久未更新,github用戶eric1688基於sphinx 2.2.9版本改寫了能夠較好支持中文分詞的sphinx for chinese,本文配置均基於此版本,下載地址在此,向做者致敬。
另:若是是別的系統,建議使用elastic search開發,對中文分詞的支持更完善一些,社區更爲活躍,但需求不復雜的話,sphinx已足夠,並且配置相比更方便些。php

安裝sphinx

本文基於ubuntu 18.04安裝sphinx,從上述地址下載壓縮包,如下示例大部分基於做者的說明html

下載解壓mysql

$ git clone https://github.com/eric1688/sphinx 
$ cd sphinx

編譯(假設安裝到/usr/local/sphinx目錄,下同)c++

安裝gcc 和 mysqlgit

sudo apt update
sudo apt install build-essential
sudo apt install mysql-server

編譯安裝sphinxgithub

$ ./configure --prefix=/usr/local/sphinx --prefix 指定安裝路徑 --with-mysql 編譯mysql支持 --with-pgsql 編譯pgsql支持 
$ make 
$ make install

安裝完成後,sphinx目錄下應有bin、etc、share三個子目錄,新建data和log兩個子目錄sql

配置中文支持數據庫

$ tar -xvf xdict_1.1.tar.gz $ /usr/local/sphinx/bin/mkdict xdict_1.1.txt xdict

從xdict_1.1.txt生成xdict文件,xdict_1.1.txt文件能夠根據須要進行修改ubuntu

$ cp xdict /usr/local/sphinx/etc/

配置sphinx.conf

sphinx.conf是最關鍵的一部分,sphinx的運做主要基於該文件。如下是本人的配置文件,結合phpcms數據庫進行配置,該文件存放於bin子目錄中api

# 主索引數據源
source main
{
	type = mysql

	sql_host = 192.168.91.1 #phpcms所使用數據庫地址
	sql_user = sphinx #用戶名
	sql_pass = sphinx #密碼
	sql_db = phpcms_v9 #phpcms數據庫名字
	sql_port = 3306 #數據庫端口
	sql_query_pre = SET NAMES utf8 
	sql_query_pre = REPLACE INTO v9_sphinx_counter SELECT 1, MAX(searchid) FROM v9_search
	sql_query = SELECT searchid, adddate, siteid, typeid, id as news_id, data FROM v9_search WHERE searchid>=$start AND searchid<=$end
	sql_query_range  = SELECT 1,max_doc_id FROM v9_sphinx_counter WHERE counter_id=1
	sql_range_step = 5000 

	sql_attr_uint  = typeid
	sql_attr_uint  = siteid
	sql_attr_uint  = news_id
	sql_attr_timestamp  = adddate
	sql_query_info  = SELECT * FROM v9_search WHERE searchid=$id
}

#增量索引數據源
source delta
{
	type = mysql
	sql_host = 192.168.91.1 #phpcms所使用數據庫地址
	sql_user = sphinx #用戶名
	sql_pass = sphinx #密碼
	sql_db = phpcms_v9 #phpcms數據庫名字
	sql_port = 3306 #數據庫端口
	sql_query_pre = SET NAMES utf8 
	sql_query_pre = REPLACE INTO v9_sphinx_counter SELECT 1, MAX(searchid) FROM v9_search
	sql_query = SELECT searchid, adddate, siteid, typeid, id as news_id, data FROM v9_search WHERE searchid>=$start AND searchid<=$end
	sql_query_range  = SELECT 1,max_doc_id FROM v9_sphinx_counter WHERE counter_id=1
	sql_range_step = 5000 

	sql_attr_uint  = typeid
	sql_attr_uint  = siteid
	sql_attr_uint  = news_id
	sql_attr_timestamp  = adddate
	sql_query_info  = SELECT * FROM v9_search WHERE searchid=$id
}

#主索引
index main
{ 
	source = main
	path = /usr/local/sphinx/data/main
	charset_type = utf-8 
	chinese_dictionary = /usr/local/sphinx/etc/xdict
}  

 #增量索引
 index delta
 {
	source = delta
	path = /usr/local/sphinx/data/delta
	charset_type = utf-8
	chinese_dictionary = /usr/local/sphinx/etc/xdict
} 

indexer
{
	mem_limit = 128M
}

searchd
{
	listen = 9312
	listen = 9306:mysql41 #監聽端口,配置完成後可用於測試
	log = /usr/local/sphinx/log/searchd.log
	query_log = /usr/local/sphinx/log/query.log
	read_timeout = 5
	max_children = 30
	pid_file = /usr/local/sphinx/log/searchd.pid
	seamless_rotate = 1
	preopen_indexes = 1
	unlink_old = 1
	binlog_path = /usr/local/sphinx/data
}

創建索引文件,並開啓服務

# 如下命令執行的當前目錄均爲/usr/local/sphinx
$ ./indexer --all
$ ./searchd

測試

# 假設sphinx所在服務器ip爲192.168.91.130
$ mysql -h192.168.91.130 -P9306
mysql> select * from main;
+------+------------+--------+--------+---------+
| id   | adddate    | siteid | typeid | news_id |
+------+------------+--------+--------+---------+
|  941 | 1585707168 |      1 |      1 |     231 |
|  942 | 1585707168 |      1 |      1 |     232 |
|  943 | 1585707168 |      1 |      1 |     233 |
|  944 | 1585707168 |      1 |      1 |     234 |
|  945 | 1585707168 |      1 |      1 |     235 |
|  946 | 1585707168 |      1 |      1 |     236 |
|  947 | 1585707168 |      1 |      1 |     237 |
|  948 | 1585707168 |      1 |      1 |     238 |
|  949 | 1585707168 |      1 |      1 |     239 |
|  950 | 1585707168 |      1 |      1 |     240 |
|  951 | 1585707168 |      1 |      1 |     241 |
|  952 | 1585707168 |      1 |      1 |     242 |
|  953 | 1585707168 |      1 |      1 |     243 |
|  954 | 1585707168 |      1 |      1 |     244 |
|  955 | 1585707168 |      1 |      1 |     245 |
|  956 | 1585707168 |      1 |      1 |     246 |
|  957 | 1585707168 |      1 |      1 |     247 |
|  958 | 1585707168 |      1 |      1 |     248 |
+------+------------+--------+--------+---------+
20 rows in set (0.00 sec)

phpcms相關配置和部分文件修改(todo)

phpcms(版本:phpcmsv9 9.6.3)集成了sphinx的接口,也有相關的文檔,但對應的sphinx版本較老,所以要正常使用,須要作必定的修改

後臺配置

模塊->全站搜索->模塊配置

基本配置:「是否啓用全站搜索」選擇「是」

sphinx全文索引配置:選擇啓用,服務器主機地址:192.168.91.130 端口號:9312

相關代碼修改

phpcms>modules>search>index.php

......
//若是開啓sphinx
if ($setting['sphinxenable']) {
    $sphinx = pc_base::load_app_class('search_interface', '', 0);
    $sphinx = new search_interface();

    $offset = $pagesize * ($page - 1);
    //因所使用的sphinx版本不支持「id」做爲排序的名字,因此用news_id替代,並不是必須新聞模型
    $res = $sphinx->search($q, array($siteid), array($typeid), array($search_time, SYS_TIME), $offset, $pagesize, '@weight desc, news_id desc');
    
    $totalnums = $res['total'];
    //若是結果不爲空
    if (!empty($res['matches'])) {
        $result = $res['matches'];
    }
}
.......
......
//若是結果不爲空
if (!empty($result) || !empty($commend['id'])) {
    //開啓sphinx後文章id取法不一樣
    if ($setting['sphinxenable']) {
        foreach ($result as $_k => $_v) {
            //根據sphinx版本不一樣,$result數組中格式稍有不一樣,此處稍做修改
            // $sids[] = $_v['attrs']['id'];
            $sids[] = $_v['attrs']['news_id'];
        }
        $sort_str = implode(",", $sids);
    } 
......
......
/**
* 若是表名爲空,則爲黃頁模型
*/
if (empty($this->content_db->model_tablename)) {
$this->content_db = pc_base::load_model('yp_content_model');
$this->content_db->set_model($modelid);
}

if ($setting['sphinxenable']) {
//頁面展現的搜索結果沒有按照sphinx給出的權重排序,此處進行調整
// $data = $this->content_db->listinfo($where, "id desc", 1, $pagesize);
$data = $this->content_db->listinfo($where, "field(id," . $sort_str . ")", 1, $pagesize);
$pages = pages($totalnums, $page, $pagesize);
} else {
$data = $this->content_db->select($where, '*');
$pages = $this->db->pages;
$totalnums = $this->db->number;
}
......

引入sphinx api

從下載的安裝包api目錄中找到sphinxapi.php,複製到phpcms>modules>search>classes目錄中,並更名爲sphinxapi.class.php

定時索引(todo)

配置完畢,若有疑問請留言

參考:sphinx官方文檔

相關文章
相關標籤/搜索