php 敏感詞過濾

最近的任務是敏感詞過濾(檢測文章中的敏感詞,手機號及網址並高亮),首先是百度和查看項目代碼,因而有了javascript

版本一:

1.worklevel.phpphp

//根據文章id獲取敏感詞庫和文章內容

public function actionSpecial($id){

    $SensitiveWord   =   SensitiveWord::find()->where(['status'=>SensitiveWord::STATUS_1])->all();

    $text            =   ArticleContent::find()->where(['article_id' => $id])->asArray()->all();

    $content         =   $text[0]['content'];

    $test            =   SensitiveWord::setSpecialColor($SensitiveWord,$content);

    return $test;

}

2.sensitiveword.phpcss

public static function setSpecialColor($SensitiveWord='',$text=''){
    if(!empty($SensitiveWord)) {
        foreach ($SensitiveWord as $v) {
            $str   =  $v->name;
            $tmp   =  '<span class="setjuhuospecialcolor">' . $v->name . '</span>';//換顏色
            $text  =  str_replace($str, $tmp, $text);//替換

            //匹配手機號
            if (preg_match_all("/1\d{10}?/", $text, $result)) {
                foreach ($result[0] as $key => $value) {
                    $str   =  $value;
                    $tmp   =  '<span class="setjuhuospeciaphonelcolor">' . $value . '</span>';
                    $text  =  str_replace($str, $tmp, $text);
                }
             }

             //匹配網址
             $pattern = '/(http|https):\/\/([\w\d\-_]+[\.\w\d\-_]+)[:\d+]?([\/]?[\w\/\.]+)/i';
             if (preg_match_all($pattern, $text, $result)) {
                 foreach ($result[0] as $key => $value) {
                     $str   =  $value;
                     $tmp   =  '<span class="setjuhuospeciaphonelcolor">' . $value . '</span>';
                     $text  =  str_replace($str, $tmp, $text);
                 }
             }
        }
    }else{
        //匹配手機號
        if(preg_match_all("/1\d{10}?/",$text,$result)){
            foreach ($result[0] as $key => $value){
                $str   =  $value;
                $tmp   =  '<span class="setjuhuospeciaphonelcolor">'.$value.'</span>';
                $text  =  str_replace($str,$tmp,$text);
            }
         }

         //匹配網址
         $pattern = '/(http|https):\/\/([\w\d\-_]+[\.\w\d\-_]+)[:\d+]?([\/]?[\w\/\.]+)/i';
         if(preg_match_all($pattern,$text,$result)){
             foreach ($result[0] as $key => $value){
                 $str   =  $value;
                 $tmp   =  '<span class="setjuhuospeciaphonelcolor">'.$value.'</span>';
                 $text  =  str_replace($str,$tmp,$text);
             }
         }
    }
    return $text;
}

其實這個版本仍是能用的,可是真正到線上運行的時候打開文章預覽會出現超時的問題,並且考慮到後期若是詞庫太大,效率方面不是很好html

因而打算改成用異步的方式,考慮到同時使用人多的狀況,順便把整個過濾功能放到JS中去處理,而不是讓服務器區處理,提升加載速度java

版本二:

1.view.phpjquery

<style type="text/css">
    .title{   text-align: center; font-size: 25px; font-weight: bold;}
    .time{    text-align: center; margin: 25px;}
    .content{ width: 92%; margin-left: 4%;}
    .remark{  margin-top: 50px; border: 1px solid #000000; width: 92%; margin-left: 4%; padding: 5px; border-radius: 5px;}
    .setjuhuospecialcolor{color: #ffcc00}
    .setjuhuospeciaphonelcolor{color: #ffcc00}
</style>


<script src="/js/admin/jquery-1.10.2.min.js"></script>
<script language="JavaScript">
    
    //獲取文章
    $.get('/admin/work-level/get-article?id=<?php echo $_GET['id'];?>',function (article_content) {
        //先顯示內容給用戶看
        $("#content").html(article_content);
        
        //去除敏感詞後的內容展現給用戶(可能消耗大量時間)
        $.get('/admin/work-level/get-sensitive-word',function (data) { //由於JS只能接受字符串,因此先把敏感詞拼成字符串,再進行拆分,變成數組
            var getstrimg        =  data;
            var sensitiveword    =  new Array(getstrimg .length);
            sensitiveword        =  getstrimg .split('-'); //循環查找關鍵字進行替換
            $.each(sensitiveword,function(key,value){
                var reg_keyword  =  eval('/'+value+'/g');
                article_content  =  article_content.replace(reg_keyword, '<span class="setjuhuospeciaphonelcolor">'+value+'</span>');
            });

            //替換手機號
            var reg_phone_arr    =  /1\d{10}?/g;
            var phone            =  article_content.match(reg_phone_arr);
            var phone_arr        =  delete_repetition(phone);
            for(var i=0;i<phone_arr.length;i++){
                var reg_phone    =  eval('/'+phone_arr[i]+'/g');
                article_content  =  article_content.replace(reg_phone, '<span class="setjuhuospeciaphonelcolor">'+phone_arr[i]+'</span>');
            }

            //替換網址
            var reg_url_arr      =  /(http|https):\/\/([\w\d\-_]+[\.\w\d\-_]+)[:\d+]?([\/]?[\w\/\.]+)?/g;
            var url              =  article_content.match(reg_url_arr);
            var url_arr          =  delete_repetition(url);
            for(var t=0;t<url_arr.length;t++){
                var reg_url      =  eval("url_arr[t]");
                article_content  =  article_content.replace(reg_url, '<span class="setjuhuospeciaphonelcolor">'+url_arr[t]+'</span>');
            }
            $("#content").html(article_content);
        });
    });

    //去重函數
    function delete_repetition(arr){
        var tmp = new Array();
        for(var m in arr){
            tmp[arr[m]]=1;
        }

        //再把鍵和值的位置再次調換
        var tmparr = new Array();
        for(var n in tmp){
            tmparr.push(n);
        }
        return tmparr;
    }
</script>


2.WorkLevelController.phpgit

<?php
namespace app\controllers\admin;

use app\components\common\Tools;
use app\models\WorkLevel;
use app\models\SensitiveWord;
use app\models\ArticleContent;
use Yii;


class WorkLevelController extends AdminController{

    //獲取文章內容
    public function actionGetArticle($id){
        $result = ArticleContent::find()->where(['article_id' => $id])->asArray()->all();
        return $result[0]['content'];
    }

    //獲取敏感詞
    public function actionGetSensitiveWord(){
        $sql   =  'select name FROM sensitive_word where status = 1';
        $test  =  SensitiveWord::findBySql($sql)->asArray()->all();
        foreach ($test as $key =>$value ){
            $result[$key] = $value['name'];
        }
        $result1 = implode('-',$result);
        return $result1;
    }
}

最終效果,把須要過濾的顯示成金色:github

固然,以上的方法是基於目前不是不少的敏感詞庫,若是敏感詞庫特別大的話,建議參考sql

1.PHP實現敏感詞過濾系統數組

2.基於AC狀態機的關鍵詞過濾

相關文章
相關標籤/搜索