pase.py 解析文字中包含的成語

# -*- coding: UTF-8 -*-
# tanj
# 2018-01-15
import ConfigParser

import MySQLdb
import pandas as pd

config = None


# get config
def getConfig():
    global config
    if config is None:
        config = ConfigParser.ConfigParser()
        config.read("config.ini")
        return config


# get database connect
def get_con():
    global config
    config = getConfig()
    mysql_host = config.get('localdb', 'host')
    mysql_port = config.get('localdb', 'port')
    mysql_user = config.get('localdb', 'user')
    mysql_passwd = config.get('localdb', 'password')
    mysql_db = config.get('localdb', 'database')
    mysql_charset = config.get('localdb', 'charset')
    config = None
    conn = MySQLdb.connect(host=mysql_host, port=int(mysql_port), user=mysql_user, passwd=mysql_passwd, db=mysql_db,
                           charset=mysql_charset)
    return conn


def query(sql):
    # 使用cursor()方法獲取操做遊標
    conn = get_con()
    # 使用cursor()方法獲取操做遊標
    cursor = conn.cursor()
    try:
        cursor.execute(sql, None)
        result = cursor.fetchall()
    except Exception, e:
        print "mysql query error: %s", e
        return None
    finally:
        cursor.close()
        conn.close()
    return result


file = open("workdata/word.txt")
words =set()
while True:
    lines = file.readlines(1)
    if not lines:
        break
    for line in lines:
        words.add(line)
        pass  # do something
file.close()

result_list=[]
for line in words:
    for word in line.split(','):
        select_sql = "select chengyu from idioms_dic WHERE chengyu like '%" + word + "%'"
        result = query(select_sql)
        for row in result:
            result_list.append(row[0])
            # print row[0]

def test4():
    from  collections import Counter
    import operator
    #進行統計
    a = dict(Counter(result_list))
    #進行排序
    b= sorted(a.items(), key=operator.itemgetter(1),reverse=True)
    return b

if __name__ == '__main__':
    lis=test4()
    for row in lis:
        # print row[0],row[1]
        if  row[1] == 4 :
            print row[0]



# for line in words:
#     select_sql=""
#     for word in line.split(','):
#         select_sql_chil = "select chengyu from idioms_dic WHERE chengyu like '%" + word + "%'"
#         select_sql = select_sql_chil + " union " + select_sql
#     print select_sql[:-6]
#     result = query(select_sql)
#     print result
#     # for row in result:
#     #     print row[0]
#數據庫配置
[localdb]
host = 127.0.0.1
port = 3306
database = test
user = root
password = 123456
charset=utf8
相關文章
相關標籤/搜索