【多叉樹】 專門處理字符串,專門爲 字典(一個詞條和一個示意相對應)設計的數據結構;
在 字典 中,有 n 個詞條,使用 映射 方式查找,本質是使用 樹結構 ,查詢的時間複雜度是 O(log n);
使用 字典樹(Tire) 的數據結構時,查詢每個詞條的時間複雜度與 字典 中一共有多少條目無關;與查詢字符串[單詞]的長度 w 相關,時間複雜度爲 O(w);[但大多數單詞的長度小於 10]
字典樹圖解:
每個節點有 26 (26個英文字母,根據情況可更改)個指向下個節點的指針
在不同情景下,每個節點有若干指向下個節點的指針,這是動態的數據結構
僅靠判斷節點是否是葉子節點來判斷單詞是否查詢完成是不可行的,因爲有些單詞本身就是其他單詞的一部分[pan 是 panda 的一部分],故添加 boolean 值 isWord 來判斷當前的節點是否代表一個單詞的結尾;
代碼實現:
Main.java
public class Main { public static void main(String[] args) { // write your code here } }
Trie.java
import java.util.TreeMap; public class Trie { //創建 Tire private class Node{ public boolean isWord; public TreeMap<Character, Node> next; //到下一個節點的映射,字符串是 Character public Node(boolean isWord){ this.isWord = isWord; next = new TreeMap<>(); //初始化 Map } public Node(){ this(false); } } private Node root; private int size; public Trie(){ root = new Node(); //初始化 root size = 0; } // 獲得Trie中存儲的單詞數量 public int getSize(){ return size; } // 向Trie中添加一個新的單詞word public void add(String word){ Node cur = root; for(int i = 0 ; i < word.length() ; i ++){ //遍歷整個 word char c = word.charAt(i); if(cur.next.get(c) == null) cur.next.put(c, new Node()); //新創建節點 cur = cur.next.get(c); } if(!cur.isWord){ //該節點不是任何單詞的結尾 cur.isWord = true; size ++; } } }
代碼實現:
Trie.java
import java.util.TreeMap; public class Trie { private class Node{ public boolean isWord; public TreeMap<Character, Node> next; public Node(boolean isWord){ this.isWord = isWord; next = new TreeMap<>(); } public Node(){ this(false); } } private Node root; private int size; public Trie(){ root = new Node(); size = 0; } // 獲得Trie中存儲的單詞數量 public int getSize(){ return size; } // 向Trie中添加一個新的單詞word public void add(String word){ Node cur = root; for(int i = 0 ; i < word.length() ; i ++){ char c = word.charAt(i); if(cur.next.get(c) == null) cur.next.put(c, new Node()); cur = cur.next.get(c); } if(!cur.isWord){ cur.isWord = true; size ++; } } // 查詢單詞word是否在Trie中【新增代碼】 public boolean contains(String word){ Node cur = root; for(int i = 0 ; i < word.length() ; i ++){ //遍歷字符串中所有的字符 char c = word.charAt(i); //遍歷到的字符 c 放到 word.char if(cur.next.get(c) == null) //cur 是否包含 c 到下一個節點的映射 return false; cur = cur.next.get(c); //c 來到其下一個節點 } return cur.isWord; } }
測試 Tire 與動態數組時間複雜度
BST.java
import java.util.LinkedList; import java.util.Queue; import java.util.Stack; public class BST<E extends Comparable<E>> { private class Node{ public E e; public Node left, right; public Node(E e){ this.e = e; left = null; right = null; } } private Node root; private int size; public BST(){ root = null; size = 0; } public int size(){ return size; } public boolean isEmpty(){ return size == 0; } // 向二分搜索樹中添加新的元素e public void add(E e){ root = add(root, e); } // 向以node爲根的二分搜索樹中插入元素e,遞歸算法 // 返回插入新節點後二分搜索樹的根 private Node add(Node node, E e){ if(node == null){ size ++; return new Node(e); } if(e.compareTo(node.e) < 0) node.left = add(node.left, e); else if(e.compareTo(node.e) > 0) node.right = add(node.right, e); return node; } // 看二分搜索樹中是否包含元素e public boolean contains(E e){ return contains(root, e); } // 看以node爲根的二分搜索樹中是否包含元素e, 遞歸算法 private boolean contains(Node node, E e){ if(node == null) return false; if(e.compareTo(node.e) == 0) return true; else if(e.compareTo(node.e) < 0) return contains(node.left, e); else // e.compareTo(node.e) > 0 return contains(node.right, e); } // 二分搜索樹的前序遍歷 public void preOrder(){ preOrder(root); } // 前序遍歷以node爲根的二分搜索樹, 遞歸算法 private void preOrder(Node node){ if(node == null) return; System.out.println(node.e); preOrder(node.left); preOrder(node.right); } // 二分搜索樹的非遞歸前序遍歷 public void preOrderNR(){ Stack<Node> stack = new Stack<>(); stack.push(root); while(!stack.isEmpty()){ Node cur = stack.pop(); System.out.println(cur.e); if(cur.right != null) stack.push(cur.right); if(cur.left != null) stack.push(cur.left); } } // 二分搜索樹的中序遍歷 public void inOrder(){ inOrder(root); } // 中序遍歷以node爲根的二分搜索樹, 遞歸算法 private void inOrder(Node node){ if(node == null) return; inOrder(node.left); System.out.println(node.e); inOrder(node.right); } // 二分搜索樹的後序遍歷 public void postOrder(){ postOrder(root); } // 後序遍歷以node爲根的二分搜索樹, 遞歸算法 private void postOrder(Node node){ if(node == null) return; postOrder(node.left); postOrder(node.right); System.out.println(node.e); } // 二分搜索樹的層序遍歷 public void levelOrder(){ Queue<Node> q = new LinkedList<>(); q.add(root); while(!q.isEmpty()){ Node cur = q.remove(); System.out.println(cur.e); if(cur.left != null) q.add(cur.left); if(cur.right != null) q.add(cur.right); } } // 尋找二分搜索樹的最小元素 public E minimum(){ if(size == 0) throw new IllegalArgumentException("BST is empty!"); return minimum(root).e; } // 返回以node爲根的二分搜索樹的最小值所在的節點 private Node minimum(Node node){ if(node.left == null) return node; return minimum(node.left); } // 尋找二分搜索樹的最大元素 public E maximum(){ if(size == 0) throw new IllegalArgumentException("BST is empty"); return maximum(root).e; } // 返回以node爲根的二分搜索樹的最大值所在的節點 private Node maximum(Node node){ if(node.right == null) return node; return maximum(node.right); } // 從二分搜索樹中刪除最小值所在節點, 返回最小值 public E removeMin(){ E ret = minimum(); root = removeMin(root); return ret; } // 刪除掉以node爲根的二分搜索樹中的最小節點 // 返回刪除節點後新的二分搜索樹的根 private Node removeMin(Node node){ if(node.left == null){ Node rightNode = node.right; node.right = null; size --; return rightNode; } node.left = removeMin(node.left); return node; } // 從二分搜索樹中刪除最大值所在節點 public E removeMax(){ E ret = maximum(); root = removeMax(root); return ret; } // 刪除掉以node爲根的二分搜索樹中的最大節點 // 返回刪除節點後新的二分搜索樹的根 private Node removeMax(Node node){ if(node.right == null){ Node leftNode = node.left; node.left = null; size --; return leftNode; } node.right = removeMax(node.right); return node; } // 從二分搜索樹中刪除元素爲e的節點 public void remove(E e){ root = remove(root, e); } // 刪除掉以node爲根的二分搜索樹中值爲e的節點, 遞歸算法 // 返回刪除節點後新的二分搜索樹的根 private Node remove(Node node, E e){ if( node == null ) return null; if( e.compareTo(node.e) < 0 ){ node.left = remove(node.left , e); return node; } else if(e.compareTo(node.e) > 0 ){ node.right = remove(node.right, e); return node; } else{ // e.compareTo(node.e) == 0 // 待刪除節點左子樹爲空的情況 if(node.left == null){ Node rightNode = node.right; node.right = null; size --; return rightNode; } // 待刪除節點右子樹爲空的情況 if(node.right == null){ Node leftNode = node.left; node.left = null; size --; return leftNode; } // 待刪除節點左右子樹均不爲空的情況 // 找到比待刪除節點大的最小節點, 即待刪除節點右子樹的最小節點 // 用這個節點頂替待刪除節點的位置 Node successor = minimum(node.right); successor.right = removeMin(node.right); successor.left = node.left; node.left = node.right = null; return successor; } } @Override public String toString(){ StringBuilder res = new StringBuilder(); generateBSTString(root, 0, res); return res.toString(); } // 生成以node爲根節點,深度爲depth的描述二叉樹的字符串 private void generateBSTString(Node node, int depth, StringBuilder res){ if(node == null){ res.append(generateDepthString(depth) + "null\n"); return; } res.append(generateDepthString(depth) + node.e +"\n"); generateBSTString(node.left, depth + 1, res); generateBSTString(node.right, depth + 1, res); } private String generateDepthString(int depth){ StringBuilder res = new StringBuilder(); for(int i = 0 ; i < depth ; i ++) res.append("--"); return res.toString(); } }
BSTSet.java
public class BSTSet<E extends Comparable<E>> implements Set<E> { private BST<E> bst; public BSTSet(){ bst = new BST<>(); } @Override public int getSize(){ return bst.size(); } @Override public boolean isEmpty(){ return bst.isEmpty(); } @Override public void add(E e){ bst.add(e); } @Override public boolean contains(E e){ return bst.contains(e); } @Override public void remove(E e){ bst.remove(e); } }
FileOperation.java
import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Locale; import java.util.Scanner; // 文件相關操作 public class FileOperation { // 讀取文件名稱爲filename中的內容,並將其中包含的所有詞語放進words中 public static boolean readFile(String filename, ArrayList<String> words){ if (filename == null || words == null){ System.out.println("filename is null or words is null"); return false; } // 文件讀取 Scanner scanner; try { File file = new File(filename); if(file.exists()){ FileInputStream fis = new FileInputStream(file); scanner = new Scanner(new BufferedInputStream(fis), "UTF-8"); scanner.useLocale(Locale.ENGLISH); } else return false; } catch(IOException ioe){ System.out.println("Cannot open " + filename); return false; } // 簡單分詞 // 這個分詞方式相對簡陋, 沒有考慮很多文本處理中的特殊問題 // 在這裏只做demo展示用 if (scanner.hasNextLine()) { String contents = scanner.useDelimiter("\\A").next(); int start = firstCharacterIndex(contents, 0); for (int i = start + 1; i <= contents.length(); ) if (i == contents.length() || !Character.isLetter(contents.charAt(i))) { String word = contents.substring(start, i).toLowerCase(); words.add(word); start = firstCharacterIndex(contents, i); i = start + 1; } else i++; } return true; } // 尋找字符串s中,從start的位置開始的第一個字母字符的位置 private static int firstCharacterIndex(String s, int start){ for( int i = start ; i < s.length() ; i ++ ) if( Character.isLetter(s.charAt(i)) ) return i; return s.length(); } }
Set.java
public interface Set<E> { void add(E e); boolean contains(E e); void remove(E e); int getSize(); boolean isEmpty(); }
Main.java
import java.util.ArrayList; public class Main { public static void main(String[] args) { System.out.println("Pride and Prejudice"); ArrayList<String> words = new ArrayList<>(); //使用動態數組的數據結構 if(FileOperation.readFile("pride-and-prejudice.txt", words)){ long startTime = System.nanoTime(); //開始計時 BSTSet<String> set = new BSTSet<>(); for(String word: words) set.add(word); //將 word 添加到 set 中 for(String word: words) set.contains(word);//查詢 set 中是否包含 word long endTime = System.nanoTime();//結束計時 double time = (endTime - startTime) / 1000000000.0;//納秒轉換爲秒 System.out.println("Total different words: " + set.getSize()); System.out.println("BSTSet: " + time + " s"); // --- startTime = System.nanoTime(); Trie trie = new Trie(); //使用字典樹的數據結構 for(String word: words) trie.add(word); for(String word: words) trie.contains(word); endTime = System.nanoTime(); time = (endTime - startTime) / 1000000000.0; System.out.println("Total different words: " + trie.getSize()); System.out.println("Trie: " + time + " s"); } } }
輸出:二者相差不大,使用 字典樹 與所查詢的文本大小無關,與所要查詢字符的長度有關,長度越小,所在的文本越大,使用字典樹就更有優勢。
在 Tire 中搜索一個單詞的過程中,在一個支路上所經過的字符串都是目標單詞的前綴嗎,通過這種數據結構,可以快速的查看在
當前存儲的所有單詞中,是否有某一個前綴對應的單詞
代碼實現:
Trie.java
import java.util.TreeMap; public class Trie { private class Node{ public boolean isWord; public TreeMap<Character, Node> next; public Node(boolean isWord){ this.isWord = isWord; next = new TreeMap<>(); } public Node(){ this(false); } } private Node root; private int size; public Trie(){ root = new Node(); size = 0; } // 獲得Trie中存儲的單詞數量 public int getSize(){ return size; } // 向Trie中添加一個新的單詞word public void add(String word){ Node cur = root; for(int i = 0 ; i < word.length() ; i ++){ char c = word.charAt(i); if(cur.next.get(c) == null) cur.next.put(c, new Node()); cur = cur.next.get(c); } if(!cur.isWord){ cur.isWord = true; size ++; } } // 查詢單詞word是否在Trie中 public boolean contains(String word){ Node cur = root; for(int i = 0 ; i < word.length() ; i ++){ char c = word.charAt(i); if(cur.next.get(c) == null) return false; cur = cur.next.get(c); } return cur.isWord; } // 查詢是否在Trie中有單詞以prefix爲前綴【新增代碼】 public boolean isPrefix(String prefix){ Node cur = root; for(int i = 0 ; i < prefix.length() ; i ++){ char c = prefix.charAt(i); if(cur.next.get(c) == null) return false; cur = cur.next.get(c); } return true; //與查詢是否包含操作唯一不同之處 } }
字典樹習題:習題鏈接
代碼實現:
import java.util.TreeMap; public class Trie{ private class Node{ public boolean isWord; public TreeMap<Character, Node> next; public Node(boolean isWord){ this.isWord = isWord; next = new TreeMap<>(); } public Node(){ this(false); } } private Node root; public Trie(){ root = new Node(); } // 向Trie中添加一個新的單詞word public void insert(String word){ Node cur = root; for(int i = 0 ; i < word.length() ; i ++){ char c = word.charAt(i); if(cur.next.get(c) == null) cur.next.put(c, new Node()); cur = cur.next.get(c); } cur.isWord = true; } // 查詢單詞word是否在Trie中 public boolean search(String word){ Node cur = root; for(int i = 0 ; i < word.length() ; i ++){ char c = word.charAt(i); if(cur.next.get(c) == null) return false; cur = cur.next.get(c); } return cur.isWord; } // 查詢是否在Trie中有單詞以prefix爲前綴 public boolean startsWith(String isPrefix){ Node cur = root; for(int i = 0 ; i < isPrefix.length() ; i ++){ char c = isPrefix.charAt(i); if(cur.next.get(c) == null) return false; cur = cur.next.get(c); } return true; } }
輸出:
題2 習題鏈接
代碼實現:
/// Leetcode 211. Add and Search Word - Data structure design /// https://leetcode.com/problems/add-and-search-word-data-structure-design/description/ import java.util.TreeMap; public class WordDictionary { private class Node{ public boolean isWord; public TreeMap<Character, Node> next; public Node(boolean isWord){ this.isWord = isWord; next = new TreeMap<>(); } public Node(){ this(false); } } private Node root; /** Initialize your data structure here. */ public WordDictionary() { root = new Node(); } /** Adds a word into the data structure. */ public void addWord(String word) { Node cur = root; for(int i = 0 ; i < word.length() ; i ++){ char c = word.charAt(i); if(cur.next.get(c) == null) cur.next.put(c, new Node()); cur = cur.next.get(c); } cur.isWord = true; } /** Returns if the word is in the data structure. A word could contain the dot character '.' to represent any one letter. */ public boolean search(String word) { return match(root, word, 0); } private boolean match(Node node, String word, int index){ if(index == word.length()) return node.isWord; char c = word.charAt(index); if(c != '.'){ if(node.next.get(c) == null) return false; return match(node.next.get(c), word, index + 1); } else{ for(char nextChar: node.next.keySet()) if(match(node.next.get(nextChar), word, index + 1)) return true; return false; } } }
輸出:
題3 習題鏈接
代碼實現:
import java.util.TreeMap; public class MapSum { private class Node{ public int value; public TreeMap<Character, Node> next; public Node(int value){ this.value = value; next = new TreeMap<>(); } public Node(){ this(0); } } private Node root; /** Initialize your data structure here. */ public MapSum() { root = new Node(); } public void insert(String key, int val) { Node cur = root; for(int i = 0 ; i < key.length() ; i ++){ char c = key.charAt(i); if(cur.next.get(c) == null) cur.next.put(c, new Node()); cur = cur.next.get(c); } cur.value = val; } public int sum(String prefix) { Node cur = root; for(int i = 0 ; i < prefix.length() ; i ++){ char c = prefix.charAt(i); if(cur.next.get(c) == null) return 0; cur = cur.next.get(c); } return sum(cur); } private int sum(Node node){ int res = node.value; for(char c: node.next.keySet()) res += sum(node.next.get(c)); return res; } }
輸出:
補充:
1.Tire 的刪除操作
刪除 deer :當搜索到 deer 最後一個字母的時候,自底向上地刪除即可,每一個節點如果其對於的 next 爲空則相應的都可以刪除
在 panda 中刪除 pan ,n 並不是葉子節點,將 n 節點的 isword 刪除即可
2.Tire 的侷限性
最大的問題:空間
即使是 26 個字母表這樣的字符空間,TreeMap 也要存儲 26 條記錄,存儲空間是原來的 27 倍,空間消耗巨大;位解決該問題。提出 壓縮字典樹 (Compressed Trie)[維護成本高]
另一個解決方案:三分搜索樹【只有 3 個孩子,佔用空間小 ,所費時間略微多了,與所查找字符串長度成正比】