CharMatcher提供了多種對字符串處理的方法, 它的主要意圖有:c++
1. 找到匹配的字符git
2. 處理匹配的字符算法
CharMatcher內部主要實現包括兩部分:app
1. 實現了大量公用內部類, 用來方便用戶對字符串作匹配: 例如 JAVA_DIGIT 匹配數字, JAVA_LETTER 匹配字母等等.ide
2. 實現了大量處理字符串的方法, 使用特定的CharMatcher能夠對匹配到的字符串作出多種處理, 例如 remove(), replace(), trim(), retain()等等oop
CharMatcher自己是一個抽象類, 其中一些操做方法是抽象方法, 他主要依靠內部繼承CharMatcher的內部子類來實現抽象方法和重寫一些操做方法, 由於不一樣的匹配規則的這些操做方法具備不一樣的實現要求ui
CharMatcher自己提供了不少CharMatcher實現類,以下: this
ANY: 匹配任何字符spa
ASCII: 匹配是不是ASCII字符code
BREAKING_WHITESPACE: 匹配全部可換行的空白字符(不包括非換行空白字符,例如"\u00a0")
DIGIT: 匹配ASCII數字
INVISIBLE: 匹配全部看不見的字符
JAVA_DIGIT: 匹配UNICODE數字, 使用 Character.isDigit() 實現
JAVA_ISO_CONTROL: 匹配ISO控制字符, 使用 Charater.isISOControl() 實現
JAVA_LETTER: 匹配字母, 使用 Charater.isLetter() 實現
JAVA_LETTER_OR_DIGET: 匹配數字或字母
JAVA_LOWER_CASE: 匹配小寫
JAVA_UPPER_CASE: 匹配大寫
NONE: 不匹配全部字符
SINGLE_WIDTH: 匹配單字寬字符, 如中文字就是雙字寬
WHITESPACE: 匹配全部空白字符
CharMatcher is(char match): 返回匹配指定字符的Matcher
CharMatcher isNot(char match): 返回不匹配指定字符的Matcher
CharMatcher anyOf(CharSequence sequence): 返回匹配sequence中任意字符的Matcher
CharMatcher noneOf(CharSequence sequence): 返回不匹配sequence中任何一個字符的Matcher
CharMatcher inRange(char startInclusive, char endIncludesive): 返回匹配範圍內任意字符的Matcher
CharMatcher forPredicate(Predicate<? super Charater> predicate): 返回使用predicate的apply()判斷匹配的Matcher
CharMatcher negate(): 返回以當前Matcher判斷規則相反的Matcher
CharMatcher and(CharMatcher other): 返回與other匹配條件組合作與來判斷的Matcher
CharMatcher or(CharMatcher other): 返回與other匹配條件組合作或來判斷的Matcher
boolean matchesAnyOf(CharSequence sequence): 只要sequence中有任意字符能匹配Matcher,返回true
boolean matchesAllOf(CharSequence sequence): sequence中全部字符都能匹配Matcher,返回true
boolean matchesNoneOf(CharSequence sequence): sequence中全部字符都不能匹配Matcher,返回true
int indexIn(CharSequence sequence): 返回sequence中匹配到的第一個字符的座標
int indexIn(CharSequence sequence, int start): 返回從start開始,在sequence中匹配到的第一個字符的座標
int lastIndexIn(CharSequence sequence): 返回sequence中最後一次匹配到的字符的座標
int countIn(CharSequence sequence): 返回sequence中匹配到的字符計數
String removeFrom(CharSequence sequence): 刪除sequence中匹配到到的字符並返回
String retainFrom(CharSequence sequence): 保留sequence中匹配到的字符並返回
String replaceFrom(CharSequence sequence, char replacement): 替換sequence中匹配到的字符並返回
String trimFrom(CharSequence sequence): 刪除首尾匹配到的字符並返回
String trimLeadingFrom(CharSequence sequence): 刪除首部匹配到的字符
String trimTrailingFrom(CharSequence sequence): 刪除尾部匹配到的字符
String collapseFrom(CharSequence sequence, char replacement): 將匹配到的組(連續匹配的字符)替換成replacement
String trimAndCollapseFrom(CharSequence sequence, char replacement): 先trim在replace
下面對CharMatcher的經常使用的操做方法實現作一些介紹
/** * 返回一個與當前Matcher匹配規則相反的Matcher */ public CharMatcher negate() { final CharMatcher original = this; return new CharMatcher(original + ".negate()") { @Override public boolean matches(char c) { return !original.matches(c); } @Override public boolean matchesAllOf(CharSequence sequence) { return original.matchesNoneOf(sequence); } @Override public boolean matchesNoneOf(CharSequence sequence) { return original.matchesAllOf(sequence); } @Override public int countIn(CharSequence sequence) { return sequence.length() - original.countIn(sequence); } @Override public CharMatcher negate() { return original; } }; } /** * 返回一個具備組合規則鏈的Matcher */ public CharMatcher and(CharMatcher other) { return new And(this, checkNotNull(other)); } /** * And的實現和Ordering的Compound是同樣的 * 使用一個內部子類繼承Matcher,而後內部使用組合的方式將 * 多個Matcher組合在一塊兒,調用操做方法的時候依次調用這些 * Matcher的同名操做方法便可 */ private static class And extends CharMatcher { final CharMatcher first; final CharMatcher second; And(CharMatcher a, CharMatcher b) { this(a, b, "CharMatcher.and(" + a + ", " + b + ")"); } And(CharMatcher a, CharMatcher b, String description) { super(description); first = checkNotNull(a); second = checkNotNull(b); } @Override public CharMatcher and(CharMatcher other) { return new And(this, other); } @Override public boolean matches(char c) { return first.matches(c) && second.matches(c); } @Override CharMatcher withToString(String description) { return new And(first, second, description); } } /** * Or的實現與And同樣,再也不贅述 */ public CharMatcher or(CharMatcher other) { return new Or(this, checkNotNull(other)); } private static class Or extends CharMatcher { final CharMatcher first; final CharMatcher second; Or(CharMatcher a, CharMatcher b, String description) { super(description); first = checkNotNull(a); second = checkNotNull(b); } Or(CharMatcher a, CharMatcher b) { this(a, b, "CharMatcher.or(" + a + ", " + b + ")"); } @Override public CharMatcher or(CharMatcher other) { return new Or(this, checkNotNull(other)); } @Override public boolean matches(char c) { return first.matches(c) || second.matches(c); } @Override CharMatcher withToString(String description) { return new Or(first, second, description); } } /** * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to * query than the original; your mileage may vary. Precomputation takes time and is likely to be * worthwhile only if the precomputed matcher is queried many thousands of times. * * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a * worthwhile tradeoff in a browser. */ public CharMatcher precomputed() { return Platform.precomputeCharMatcher(this); } /** * 使用最慢的方式來返回字符全集中全部能被Matcher匹配的字符 * 最慢的方式?! */ char[] slowGetChars() { char[] allChars = new char[65536]; int size = 0; for (int c = Character.MIN_VALUE; c <= Character.MAX_VALUE; c++) { if (matches((char) c)) { allChars[size++] = (char) c; } } char[] retValue = new char[size]; System.arraycopy(allChars, 0, retValue, 0, size); return retValue; } /** * 只要sequence有任意字符匹配Matcher,則返回true */ public boolean matchesAnyOf(CharSequence sequence) { return !matchesNoneOf(sequence); } /** * 若是sequence全部字符都匹配Matcher,則返回true */ public boolean matchesAllOf(CharSequence sequence) { for (int i = sequence.length() - 1; i >= 0; i--) { if (!matches(sequence.charAt(i))) { return false; } } return true; } /** * 若是sequence全部字符都不匹配Matcher,則返回true */ public boolean matchesNoneOf(CharSequence sequence) { return indexIn(sequence) == -1; } /** * 返回Matcher在sequence中匹配到的第一個字符的座標 * 沒有匹配則返回 -1 */ public int indexIn(CharSequence sequence) { int length = sequence.length(); for (int i = 0; i < length; i++) { if (matches(sequence.charAt(i))) { return i; } } return -1; } /** * 返回Matcher在sequence中從start開始的匹配到的第一個字符的座標 */ public int indexIn(CharSequence sequence, int start) { int length = sequence.length(); Preconditions.checkPositionIndex(start, length); for (int i = start; i < length; i++) { if (matches(sequence.charAt(i))) { return i; } } return -1; } /** * 返回sequence最後一次匹配到Matcher的座標 */ public int lastIndexIn(CharSequence sequence) { for (int i = sequence.length() - 1; i >= 0; i--) { if (matches(sequence.charAt(i))) { return i; } } return -1; } /** * 返回Sequence匹配到Matcher的次數 */ public int countIn(CharSequence sequence) { int count = 0; for (int i = 0; i < sequence.length(); i++) { if (matches(sequence.charAt(i))) { count++; } } return count; } /** * 刪除sequence中匹配到的全部字符並返回 */ @CheckReturnValue public String removeFrom(CharSequence sequence) { String string = sequence.toString(); int pos = indexIn(string); if (pos == -1) { return string; } char[] chars = string.toCharArray(); int spread = 1; // This unusual loop comes from extensive benchmarking // 位移刪除算法, 使用了雙層循環和break OUT 寫法 OUT: while (true) { pos++; while (true) { if (pos == chars.length) { break OUT; } if (matches(chars[pos])) { break; } chars[pos - spread] = chars[pos]; pos++; } spread++; } return new String(chars, 0, pos - spread); } /** * 保留全部匹配的Matcher的字符並返回 * 使用逆向的Matcher的removeFrom()實現 */ @CheckReturnValue public String retainFrom(CharSequence sequence) { return negate().removeFrom(sequence); } /** * 將全部匹配到Matcher的字符換成指定字符 */ @CheckReturnValue public String replaceFrom(CharSequence sequence, char replacement) { String string = sequence.toString(); int pos = indexIn(string); if (pos == -1) { return string; } char[] chars = string.toCharArray(); chars[pos] = replacement; for (int i = pos + 1; i < chars.length; i++) { if (matches(chars[i])) { chars[i] = replacement; } } return new String(chars); } /** * 將全部能夠匹配到的字符換成指定字符串 * 他的實現與替換成字符不相同,他是使用indexIn和StringBuilder實現的 */ @CheckReturnValue public String replaceFrom(CharSequence sequence, CharSequence replacement) { int replacementLen = replacement.length(); if (replacementLen == 0) { return removeFrom(sequence); } if (replacementLen == 1) { return replaceFrom(sequence, replacement.charAt(0)); } String string = sequence.toString(); int pos = indexIn(string); if (pos == -1) { return string; } int len = string.length(); StringBuilder buf = new StringBuilder((len * 3 / 2) + 16); int oldpos = 0; do { buf.append(string, oldpos, pos); buf.append(replacement); oldpos = pos + 1; pos = indexIn(string, oldpos); } while (pos != -1); buf.append(string, oldpos, len); return buf.toString(); } /** * 去除sequence首尾全部這個Matcher匹配的字符 */ @CheckReturnValue public String trimFrom(CharSequence sequence) { int len = sequence.length(); int first; int last; for (first = 0; first < len; first++) { if (!matches(sequence.charAt(first))) { break; } } for (last = len - 1; last > first; last--) { if (!matches(sequence.charAt(last))) { break; } } return sequence.subSequence(first, last + 1).toString(); } /** * 去掉sequence開頭的全部Matcher能匹配的字符 */ @CheckReturnValue public String trimLeadingFrom(CharSequence sequence) { int len = sequence.length(); int first; for (first = 0; first < len; first++) { if (!matches(sequence.charAt(first))) { break; } } return sequence.subSequence(first, len).toString(); } /** * 刪除字符串尾部全部能匹配Matcher的字符 */ @CheckReturnValue public String trimTrailingFrom(CharSequence sequence) { int len = sequence.length(); int last; for (last = len - 1; last >= 0; last--) { if (!matches(sequence.charAt(last))) { break; } } return sequence.subSequence(0, last + 1).toString(); } /** * 將全部能被Matcher匹配的組(連續匹配的字串)替換成指定字符 */ @CheckReturnValue public String collapseFrom(CharSequence sequence, char replacement) { int first = indexIn(sequence); if (first == -1) { return sequence.toString(); } // TODO(kevinb): see if this implementation can be made faster StringBuilder builder = new StringBuilder(sequence.length()) .append(sequence.subSequence(0, first)) .append(replacement); boolean in = true; for (int i = first + 1; i < sequence.length(); i++) { char c = sequence.charAt(i); if (matches(c)) { if (!in) { builder.append(replacement); in = true; } } else { builder.append(c); in = false; } } return builder.toString(); } /** * 先trim再Collapse */ @CheckReturnValue public String trimAndCollapseFrom(CharSequence sequence, char replacement) { int first = negate().indexIn(sequence); if (first == -1) { return ""; // everything matches. nothing's left. } StringBuilder builder = new StringBuilder(sequence.length()); boolean inMatchingGroup = false; for (int i = first; i < sequence.length(); i++) { char c = sequence.charAt(i); if (matches(c)) { inMatchingGroup = true; } else { if (inMatchingGroup) { builder.append(replacement); inMatchingGroup = false; } builder.append(c); } } return builder.toString(); } // Predicate interface /** * matches()的異名方法 */ @Override public boolean apply(Character character) { return matches(character); }
補完:
1. 提供的默認實現CharMatcher功能及介紹
2. 操做方法簽名及功能列表
3. 使用代碼示例