字典樹（Trie tree）

時間 2020-01-25

標籤字典 trie tree 简体版

原文原文鏈接

Trie，又稱單詞查找樹或鍵樹，是一種樹形結構，是一種哈希樹的變種。典型應用是用於統計和排序大量的字符串（但不只限於字符串），因此常常被搜索引擎系統用於文本詞頻統計。它的優勢是：最大限度地減小無謂的字符串比較，查詢效率比哈希表高。php

性質

它有3個基本性質：html

根節點不包含字符，除根節點外每個節點都只包含一個字符。node
從根節點到某一節點，路徑上通過的字符鏈接起來，爲該節點對應的字符串。linux
每一個節點的全部子節點包含的字符都不相同。web

[編輯]圖示

這是一個Trie結構的例子：算法

在這個Trie結構中，保存了A、to、tea、ted、ten、i、in、inn這8個字符串，僅佔用8個字節（不包括指針佔用的空間）。數組

實例

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
 
#define TREE_WIDTH 256
 
#define WORDLENMAX 128
 
struct trie_node_st {
        int count;
        struct trie_node_st *next[TREE_WIDTH];
};
 
static struct trie_node_st root={0, {NULL}};
 
static char *spaces=" \t\n/.\"\'()";
 
static int
insert(const char *word)
{
        int i;
        struct trie_node_st *curr, *newnode;
 
        if (word[0]=='\0') {
                return 0;
        }
        curr = &root;
        for (i=0; ; ++i) {
                if (curr->next[ word[i] ] == NULL) {
                        newnode=(struct trie_node_st*)malloc(sizeof(struct trie_node_st));
                        memset(newnode, 0, sizeof(struct trie_node_st));
                        curr->next[ word[i] ] = newnode;
                }
                if (word[i] == '\0') {
                        break;
                }
                curr = curr->next[ word[i] ];
        }
        curr->count ++;
 
        return 0;
}
 
static void
printword(const char *str, int n)
{
        printf("%s\t%d\n", str, n);
}
 
static int
do_travel(struct trie_node_st *rootp)
{
        static char worddump[WORDLENMAX+1];
        static int pos=0;
        int i;
 
        if (rootp == NULL) {
                return 0;
        }
        if (rootp->count) {
                worddump[pos]='\0';
                printword(worddump, rootp->count);
        }
        for (i=0;i<TREE_WIDTH;++i) {
                worddump[pos++]=i;
                do_travel(rootp->next[i]);
                pos--;
        }
        return 0;
}
 
int
main(void)
{
        char *linebuf=NULL, *line, *word;
        size_t bufsize=0;
        int ret;
 
        while (1) {
                ret=getline(&linebuf, &bufsize, stdin);
                if (ret==-1) {
                        break;
                }
                line=linebuf;
                while (1) {
                        word = strsep(&line, spaces);
                        if (word==NULL) {
                                break;
                        }
                        if (word[0]=='\0') {
                                continue;
                        }
                        insert(word);
                }
        }
 
/* free(linebuf); */
 
        do_travel(&root);
 
        exit(0);
}

在給一個例子：數據結構和算法

#define MAX_NUM 26
enum NODE_TYPE{ //"COMPLETED" means a string is generated so far.
  COMPLETED,
  UNCOMPLETED
};
struct Node {
  enum NODE_TYPE type;
  char ch;
  struct Node* child[MAX_NUM]; //26-tree->a, b ,c, .....z
};
 
struct Node* ROOT; //tree root
 
struct Node* createNewNode(char ch){
  // create a new node
  struct Node *new_node = (struct Node*)malloc(sizeof(struct Node));
  new_node->ch = ch;
  new_node->type == UNCOMPLETED;
  int i;
  for(i = 0; i < MAX_NUM; i++)
    new_node->child[i] = NULL;
  return new_node;
}
 
void initialization() {
//intiazation: creat an empty tree, with only a ROOT
ROOT = createNewNode(' ');
}
 
int charToindex(char ch) { //a "char" maps to an index<br>
return ch - 'a';
}
 
int find(const char chars[], int len) {
  struct Node* ptr = ROOT;
  int i = 0;
  while(i < len) {
   if(ptr->child[charToindex(chars[i])] == NULL) {
   break;
  }
  ptr = ptr->child[charToindex(chars[i])];
  i++;
  }
  return (i == len) && (ptr->type == COMPLETED);
}
 
void insert(const char chars[], int len) {
  struct Node* ptr = ROOT;
  int i;
  for(i = 0; i < len; i++) {
   if(ptr->child[charToindex(chars[i])] == NULL) {
    ptr->child[charToindex(chars[i])] = createNewNode(chars[i]);
  }
  ptr = ptr->child[charToindex(chars[i])];
}
  ptr->type = COMPLETED;
}

Trie樹的基本實現ide

字母樹的插入（Insert）、刪除（ Delete）和查找（Find）都很是簡單，用一個一重循環便可，即第i 次循環找到前i 個字母所對應的子樹，而後進行相應的操做。實現這棵字母樹，咱們用最多見的數組保存（靜態開闢內存）便可，固然也能夠開動態的指針類型（動態開闢內存）。至於結點對兒子的指向，通常有三種方法：

一、對每一個結點開一個字母集大小的數組，對應的下標是兒子所表示的字母，內容則是這個兒子對應在大數組上的位置，即標號；

二、對每一個結點掛一個鏈表，按必定順序記錄每一個兒子是誰；

三、使用左兒子右兄弟表示法記錄這棵樹。

三種方法，各有特色。第一種易實現，但實際的空間要求較大；第二種，較易實現，空間要求相對較小，但比較費時；第三種，空間要求最小，但相對費時且不易寫。

三、 Trie樹的高級實現

能夠採用雙數組（Double-Array）實現。利用雙數組能夠大大減少內存使用量，具體實現細節見參考資料（5）（6）。

四、 Trie樹的應用

Trie是一種很是簡單高效的數據結構，但有大量的應用實例。

（1）字符串檢索

事先將已知的一些字符串（字典）的有關信息保存到trie樹裏，查找另一些未知字符串是否出現過或者出現頻率。

舉例：

@ 給出N 個單詞組成的熟詞表，以及一篇全用小寫英文書寫的文章，請你按最先出現的順序寫出全部不在熟詞表中的生詞。