python解析FreeMind思惟導圖

時間 2019-12-11
原文原文鏈接
記錄瞬間html
在實際工做中，一般須要使用思惟導圖進行一些分析和設計，可是，在設計好以後，想要把思惟導圖的內容轉化成文字進行輸出怎麼作呢？node
使用python（固然能夠使用其餘的語言進行處理）能夠很好的解決這個問題。python
代碼以下：數組
# coding:utf-8
import os
from html.parser import HTMLParser


def analyse_mm_file(mm_file):
    if os.path.isfile(mm_file):
        num = 1         # 記錄行號的標記
        point = 0       # 記錄葉子節點的標記 1 是根節點
        mark_node = 0   # 記錄節點信息標記
        mark_note = 0   # 記錄備註信息標記
        flow = ""       # 記錄流程信息
        with open(mm_file) as f:
            lines = f.readlines()
            for line in lines:
                line = line.rstrip('\n')
                if mark_node == 1 or mark_note == 1:
                    with open("temp.html", mode="a", encoding="UTF-8") as f:
                        if line.rfind('<richcontent TYPE="NODE"><html>') != -1 or \
                                line.rfind('<richcontent TYPE="NOTE"><html>') != -1:
                            f.write("<html>\n")
                            num += 1
                        elif line.rfind('</html>') != -1:
                            f.write("</html>\n")
                            num += 1
                        elif line.rfind('</richcontent>') != -1:
                            num += 1
                        elif line.rfind('</node>') != -1:
                            point -= 1
                            if mark_node == 1: mark_node = 2
                            if mark_note == 1: mark_note = 2
                            num += 1
                        else:
                            f.write(line + "\n")
                            num += 1
                    continue
                if mark_node == 2 or mark_note == 2:
                    data = analyse_html("./temp.html")
                    print("data = ", data)
                    os.remove("./temp.html")
                    if mark_node == 2:
                        # 操做 node 節點信息
                        for i in range(len(data)):
                            result = data[i].replace('\n', '')
                            print(result)
                    if mark_note == 2:
                        # 操做 note 備註信息
                        for i in range(len(data)):
                            result = data[i].replace('\n', '')
                            print(result)
                    if mark_node != 0: mark_node = 0
                    if mark_note != 0: mark_note = 0
                if line.rfind('<map version="1.0.1">') == 0 and num == 1:
                    num += 1

                if line.rfind('</map>') == 0:
                    print("解析文件完成！共解析 {} 行。".format(num))
                elif line.rfind('</node>') == 0:
                    point -= 1
                    num += 1
                elif line.rfind('<node ') == 0:
                    point += 1
                    if line.rfind('" TEXT="') != -1 and line[-2:] == '">':
                        start_num = line.rfind('" TEXT="') + 8
                        print("start num = ", start_num)
                        get_value = get_chinese(line[start_num: len(line) - 2])
                        print(get_value)
                    elif line.rfind('" TEXT="') != -1 and line[-2:] == '/>':
                        point -= 1
                        start_num = line.rfind('" TEXT="') + 8
                        print("start num = ", start_num)
                        get_value = get_chinese(line[start_num: len(line) - 3])
                        print(get_value)
                    if line.rfind('" TEXT="') == -1:
                        mark_node = 1                    # 存在 HTML 網頁
                    num += 1

                    if len(flow) == 0:
                        flow = "{}".format(point)
                    else:
                        if point == int(flow.split("_")[len(flow.split("_")) - 1]):
                            pass
                        else:
                            if point < int(flow.split("_")[len(flow.split("_")) - 1]):
                                flow = flow.split(str(point))[0] + str(point)
                            else:
                                flow = "{}_{}".format(flow, point)
                    print("整體的線性流程：", flow)

                elif line.rfind('<richcontent TYPE="NOTE"><html>') == 0:
                    with open("temp.html", mode="a", encoding="UTF-8") as f:
                        f.write('<html>\n')
                    mark_note = 1                        # 存在備註信息
                elif line.rfind('<icon ') == 0:
                    print(line)
                    num += 1
                elif line.rfind('<arrowlink ') == 0:    # 箭頭指向，能夠實現關聯
                    print(line)
                    num += 1
                elif line.rfind('<hook ') == 0:
                    print(line)
                    num += 1
                elif line.rfind('<text>') == 0:
                    # point = point + 1
                    print(line)
                    num += 1
                elif line.rfind('</hook>') == 0:
                    print(line)
                    num += 1
                elif line.rfind('<cloud/>') == 0:
                    print(line)
                    num += 1
                elif line.rfind('<font ') == 0:
                    print(line)
                    num += 1
                elif line.rfind('<edge ') == 0:
                    print(line)
                    num += 1
                else:
                    num += 1

    else:
        print("系統中沒有找到沒有FreeMind文件。{}".format(mm_file))
        exit()


def analyse_html(file_path):
    with open(file=file_path, mode="r", encoding="UTF-8") as f:
        page = f.read()
    html_parser = HP()
    html_parser.feed(page)
    html_parser.close()
    return html_parser.data


def get_chinese(line):
    get_word = ""
    array = line.split("&#x")
    flag = True
    if line.find("&#x") != -1:
        for i in range(len(array)):                # 遍歷數組
            if len(array[i]) == 0 and flag:        # 第一個值爲空時，繼續循環
                flag = False
                continue

            if array[i][4:5] == ";":               # 解析Unicode字符
                unicode = "\\u" + array[i][:4]
                get_word = get_word + unicode.encode('latin-1').decode('unicode_escape') + array[i][5:]
            elif array[i][:2] == "a;":             # 換行轉義
                get_word = get_word + "\n" + array[i][2:]
            else:
                get_word = get_word + array[i]

        return get_word
    else:
        return line.replace('&amp;', '&')


class HP(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.tag_text = False
        self.data = []

    def handle_starttag(self, tag, attr):
        if tag == 'p' or tag == 'li':
            self.tag_text = True
        if tag == 'img' and self._attr(attr, 'src'):
            self.data.append("img:{}".format(self._attr(attr, 'src')))

    def handle_endtag(self, tag):
        if tag == 'p' or tag == 'li':
            self.tag_text = False

    def handle_data(self, data):
        if self.tag_text:
            self.data.append(get_chinese(data))

    def _attr(self, attr_list, attr_name):
        for attr in attr_list:
            if attr[0] == attr_name:
                return attr[1]
        return None


analyse_mm_file("./mm/思惟導圖.mm")
================我是底線================app
相關標籤/搜索
每日一句
每一个你不满意的现在，都有一个你没有努力的曾经。