哈夫曼樹的 Python 實現

技術博客地址: github.com/yongxinz/te…html

關於哈夫曼樹的定義、構建以及哈夫曼編碼,能夠參考《大話數據結構》這本書,也能夠看這篇博客( www.cnblogs.com/kubixueshen… ),寫的也很清楚。node

下面主要來看一下哈夫曼樹的 Python 實現:python

#!/usr/bin/env python
# -*- coding: utf-8 -*-


# 統計字符出現頻率,生成映射表
def count_frequency(text):
    chars = []
    ret = []
    
    for char in text:
        if char in chars:
            continue
        else:
            chars.append(char)
            ret.append((char, text.count(char)))
    
    return ret


# 節點類
class Node:
    def __init__(self, frequency):
        self.left = None
        self.right = None
        self.father = None
        self.frequency = frequency

    def is_left(self):
        return self.father.left == self


# 建立葉子節點
def create_nodes(frequency_list):
    return [Node(frequency) for frequency in frequency_list]


# 建立Huffman樹
def create_huffman_tree(nodes):
    queue = nodes[:]

    while len(queue) > 1:
        queue.sort(key=lambda item: item.frequency)
        node_left = queue.pop(0)
        node_right = queue.pop(0)
        node_father = Node(node_left.frequency + node_right.frequency)
        node_father.left = node_left
        node_father.right = node_right
        node_left.father = node_father
        node_right.father = node_father
        queue.append(node_father)

    queue[0].father = None
    return queue[0]


# Huffman編碼
def huffman_encoding(nodes, root):
    huffman_code = [''] * len(nodes)
    
    for i in range(len(nodes)):
        node = nodes[i]
        while node != root:
            if node.is_left():
                huffman_code[i] = '0' + huffman_code[i]
            else:
                huffman_code[i] = '1' + huffman_code[i]
            node = node.father
            
    return huffman_code


# 編碼整個字符串
def encode_str(text, char_frequency, codes):
    ret = ''
    for char in text:
        i = 0
        for item in char_frequency:
            if char == item[0]:
                ret += codes[i]
            i += 1

    return ret


# 解碼整個字符串
def decode_str(huffman_str, char_frequency, codes):
    ret = ''
    while huffman_str != '':
        i = 0
        for item in codes:
            if item in huffman_str and huffman_str.index(item) == 0:
                ret += char_frequency[i][0]
                huffman_str = huffman_str[len(item):]
            i += 1

    return ret


if __name__ == '__main__':
    text = raw_input('The text to encode:')

    char_frequency = count_frequency(text)
    nodes = create_nodes([item[1] for item in char_frequency])
    root = create_huffman_tree(nodes)
    codes = huffman_encoding(nodes, root)

    huffman_str = encode_str(text, char_frequency, codes)
    origin_str = decode_str(huffman_str, char_frequency, codes)

    print 'Encode result:' + huffman_str
    print 'Decode result:' + origin_str
複製代碼

參考文檔:git

www.cnblogs.com/tomhawk/p/7…github

gist.github.com/Jackeriss/2…數據結構

arianx.me/2018/06/24/…app

相關文章
相關標籤/搜索