1: class Program
2: {
3: static void Main(string[] args)
4: {
5: Program p = new Program();
6: List<char> result = p.FindMostFrequentlyUsedChars("hjkluuiuikjk");
7:
8: Console.ReadKey();
9: }
10:
11: /// <summary>
12: /// 判斷字符串中是否存在重複字符
13: /// 該算法假設的前提條件:全部字符都是ASCII
14: /// 時間複雜度O(n),n=s.Length
15: /// 空間複雜度爲常數O(256)
16: /// 時間複雜度已經最低了,空間複雜度還能有更優化的解法嘛?
17: /// 參見IsStringWithNoDuplicatedChar_Implemenation2
18: /// </summary>
19: public bool IsStringWithNoDuplicatedChar_Implemenation1(string s)
20: {
21: bool[] flags = new bool[256];
22: foreach (char c in s)
23: {
24: //錯誤處理:含有非ASCII碼狀況
25: if (c > 255)
26: {
27: throw new ArgumentException("string contains UNICODE char");
28: }
29:
30: if (flags[c])
31: {
32: return false;
33: }
34: flags[c] = true;
35: }
36: return true;
37: }
38:
39: /// <summary>
40: /// bit映射法,該解法對比IsStringWithNoDuplicatedChar_Implemenation1空間使用上更少了
41: /// </summary>
42: public bool IsStringWithNoDuplicatedChar_Implemenation2(string s)
43: {
44: //注意是Int32而不是Int,在64位機器上Int=Int64
45: Int32[] flags = new Int32[8];
46: foreach (char c in s)
47: {
48: //依舊須要該錯誤處理,由於假設前提是不變的,不然該算法無效
49: if (c > 255)
50: {
51: throw new ArgumentException("string contains UNICODE char");
52: }
53:
54: int index = c / 32;
55: int relative_position = c % 32;
56: if ((flags[index] & (1 << relative_position)) >0)
57: {
58: return false;
59: }
60: flags[index] |= (1 << relative_position);
61: }
62: return true;
63: }
64:
65: /// <summary>
66: /// 再次增進假設,假設全部字符都是介於a-z的小寫字母
67: /// 26個字母可用一個32位的int映射
68: /// </summary>
69: public bool IsStringWithNoDuplicatedChar_Implemenation3(string s)
70: {
71: Int32 flag = 0;
72: foreach (char c in s)
73: {
74: if (c <='a' && c>='z')
75: {
76: throw new ArgumentException("string contains char out of a-z");
77: }
78:
79: int relative_position = c - 'a';
80: if ((flag & (1 << relative_position)) > 0)
81: {
82: return false;
83: }
84: flag |= (1 << relative_position);
85: }
86: return true;
87: }
88:
89:
90: /// <summary>
91: ///以上算法有意義麼?
92: ///很明顯這些算法都基於一個共同的前提就是字符集的基數是ASCII碼,
93: ///若是是Unicode,那麼無不管是哪種,都須要更大的空間
94: ///若是假設前提仍是ASCII,其實也還有更好的思路:
95: ///若是字符串長度大於字符集範圍,那麼必定屬於兩種狀況之一:
96: ///要麼有重複字符,要麼輸入非法
97: ///這即是IsStringWithNoDuplicatedChar_Implemenation4
98: /// </summary>
99: public bool IsStringWithNoDuplicatedChar_Implemenation4(string s)
100: {
101: if (s.Length > 256)
102: {
103: return false;
104: }
105: else
106: {
107: return IsStringWithNoDuplicatedChar_Implemenation2(s);
108: }
109: }
110:
111: /// <summary>
112: /// 基於輸入是ASCII這個前提,
113: /// 長度大於256,根本不用判斷,確定有重複
114: /// 長度小於256,1-3算法其實對現代計算機來講簡直是小菜一碟,不值一提
115: /// 因此更有可能的應用場景是找到最常出現的字符,這比判斷是否有重複字符看起來更有價值一些
116: /// </summary>
117: public char? FindMostFrequentlyUsedChar(string s)
118: {
119: int[] counters = new int[256];
120: int most_used_times = 0;
121: char? most_used_char=null;
122: foreach (char c in s)
123: {
124: //錯誤處理:含有非ASCII碼狀況
125: if (c > 255)
126: {
127: throw new ArgumentException("string contains UNICODE char");
128: }
129:
130: counters[c]++;
131:
132: if (counters[c] > most_used_times)
133: {
134: most_used_times = counters[c];
135: most_used_char = c;
136: }
137: }
138: return most_used_char;
139: }
140:
141: /// <summary>
142: /// 處理FindMostFrequentlyUsedChar沒法處理並列第一的狀況
143: /// 時間複雜度依舊是O(n)
144: ///
145: /// </summary>
146: public List<char> FindMostFrequentlyUsedChars(string s)
147: {
148: List<char> most_used_chars = new List<char>();
149: int[] counters = new int[256];
150: int most_used_times = 0;
151: foreach (char c in s)
152: {
153: //錯誤處理:含有非ASCII碼狀況
154: if (c > 255)
155: {
156: throw new ArgumentException("string contains UNICODE char");
157: }
158:
159: counters[c]++;
160:
161: if (counters[c] > most_used_times)
162: {
163: most_used_chars.Clear();
164: most_used_times = counters[c];
165: most_used_chars.Add(c);
166: }
167: else if (counters[c] == most_used_times)
168: {
169: most_used_chars.Add(c);
170: }
171: }
172: return most_used_chars;
173: }
174: }