1
#
!/usr/bin/enc python
2
#
-*-coding:UTF-8 -*-
3
#
4
#
5
#
Copyrigtht (c)
6
#
Laiseek Comany 2012
7
#
All rights reserved.
8
#
9
#
Finename:nearsyn.py
10
#
Function:預處理工具
11
#
12
#
Current version:1.0
13
#
author: Chen Yu
14
#
Date: 05/07/2012
15
#
16
17
class
NearSyn:
18
def
init(self):
19
self.syn = []
20
self.det = []
21
#
加載數據
22
def
load(self,filename,detfile):
23
sock = open(filename,
'
r
'
)
24
self.syn = sock.read().split(
'
\n
'
)
25
sock.close()
26
for
i
in
range(len(self.syn)):
27
self.syn[i] = self.syn[i].split(
'
'
)
28
self.det = []
29
sock = open(detfile,
'
r
'
)
30
self.det = sock.read().split(
'
\n
'
)
31
sock.close()
32
33
#
去除讀入元素中的空list
34
def
empty(self):
35
for
i
in
range(len(self.syn)):
36
#
空元素移除後元素減小
37
flag = 0
38
for
j
in
range(len(self.syn[i])):
39
if
self.syn[i][j - flag] ==
"
"
or
len(self.syn[i][j - flag]) == 0:
40
self.syn[i].remove(self.syn[i][j - flag])
41
flag += 1
42
43
#
從A中剔除D
44
def
dete(self):
45
for
i
in
range(len(self.syn)):
46
self.det[i] = self.det[i].split(
'
'
)
47
for
i
in
range(len(self.syn)):
48
for
j
in
range(len(self.det[i])):
49
if
self.det[i][j]:
50
self.syn[i].remove(self.det[i][j])
51
52
#
輸出
53
def
output(self):
54
sock = open(
'
nearsyn
'
,
'
w
'
)
55
for
i
in
range(len(self.syn)):
56
#
去掉單個詞
57
if
len(self.syn[i]) > 1:
58
for
j
in
range(len(self.syn[i])):
59
sock.write(self.syn[i][j] +
'
'
)
60
sock.write(
"
\n
"
)
61
sock.close()
62
63
if
__name__
==
'
__main__
'
:
64
t = NearSyn()
65
t.load(
'
A
'
,
'
D
'
)
66
t.empty()
67
t.dete()
68
t.output()