判斷重複字符存在:更有意義一點

   1:      class Program
   2:      {
   3:          static void Main(string[] args)
   4:          {
   5:              Program p = new Program();
   6:              List<char> result = p.FindMostFrequentlyUsedChars("hjkluuiuikjk");
   7:   
   8:              Console.ReadKey();
   9:          }
  10:   
  11:          /// <summary>
  12:          /// 判斷字符串中是否存在重複字符
  13:          /// 該算法假設的前提條件:全部字符都是ASCII
  14:          /// 時間複雜度O(n),n=s.Length
  15:          /// 空間複雜度爲常數O(256)
  16:          /// 時間複雜度已經最低了,空間複雜度還能有更優化的解法嘛?
  17:          /// 參見IsStringWithNoDuplicatedChar_Implemenation2
  18:          /// </summary>
  19:          public bool IsStringWithNoDuplicatedChar_Implemenation1(string s)
  20:          {
  21:              bool[] flags = new bool[256];
  22:              foreach (char c in s)
  23:              {
  24:                  //錯誤處理:含有非ASCII碼狀況
  25:                  if (c > 255)
  26:                  {
  27:                      throw new ArgumentException("string contains UNICODE char");
  28:                  }
  29:   
  30:                  if (flags[c])
  31:                  {
  32:                      return false;
  33:                  }
  34:                  flags[c] = true;
  35:              }
  36:              return true;
  37:          }
  38:   
  39:          /// <summary>
  40:          /// bit映射法,該解法對比IsStringWithNoDuplicatedChar_Implemenation1空間使用上更少了
  41:          /// </summary>
  42:          public bool IsStringWithNoDuplicatedChar_Implemenation2(string s)
  43:          {
  44:              //注意是Int32而不是Int,在64位機器上Int=Int64
  45:              Int32[] flags = new Int32[8];
  46:              foreach (char c in s)
  47:              {
  48:                  //依舊須要該錯誤處理,由於假設前提是不變的,不然該算法無效
  49:                  if (c > 255)
  50:                  {
  51:                      throw new ArgumentException("string contains UNICODE char");
  52:                  }
  53:   
  54:                  int index = c / 32;
  55:                  int relative_position = c % 32;
  56:                  if ((flags[index] & (1 << relative_position)) >0)
  57:                  {
  58:                      return false;
  59:                  }
  60:                  flags[index] |= (1 << relative_position);
  61:              }
  62:              return true;
  63:          }
  64:   
  65:          /// <summary>
  66:          /// 再次增進假設,假設全部字符都是介於a-z的小寫字母
  67:          /// 26個字母可用一個32位的int映射
  68:          /// </summary>
  69:          public bool IsStringWithNoDuplicatedChar_Implemenation3(string s)
  70:          {
  71:              Int32 flag = 0;
  72:              foreach (char c in s)
  73:              {
  74:                  if (c <='a' && c>='z')
  75:                  {
  76:                      throw new ArgumentException("string contains char out of a-z");
  77:                  }
  78:   
  79:                  int relative_position = c - 'a';
  80:                  if ((flag & (1 << relative_position)) > 0)
  81:                  {
  82:                      return false;
  83:                  }
  84:                  flag |= (1 << relative_position);
  85:              }
  86:              return true;
  87:          }
  88:   
  89:   
  90:          /// <summary>
  91:          ///以上算法有意義麼?
  92:          ///很明顯這些算法都基於一個共同的前提就是字符集的基數是ASCII碼,
  93:          ///若是是Unicode,那麼無不管是哪種,都須要更大的空間
  94:          ///若是假設前提仍是ASCII,其實也還有更好的思路:
  95:          ///若是字符串長度大於字符集範圍,那麼必定屬於兩種狀況之一:
  96:          ///要麼有重複字符,要麼輸入非法
  97:          ///這即是IsStringWithNoDuplicatedChar_Implemenation4
  98:          /// </summary>
  99:          public bool IsStringWithNoDuplicatedChar_Implemenation4(string s)
 100:          {
 101:              if (s.Length > 256)
 102:              {
 103:                  return false;
 104:              }
 105:              else
 106:              {
 107:                  return IsStringWithNoDuplicatedChar_Implemenation2(s);
 108:              }
 109:          }
 110:   
 111:          /// <summary>
 112:          /// 基於輸入是ASCII這個前提,
 113:          /// 長度大於256,根本不用判斷,確定有重複
 114:          /// 長度小於256,1-3算法其實對現代計算機來講簡直是小菜一碟,不值一提
 115:          /// 因此更有可能的應用場景是找到最常出現的字符,這比判斷是否有重複字符看起來更有價值一些
 116:          /// </summary>
 117:          public char? FindMostFrequentlyUsedChar(string s)
 118:          {
 119:              int[] counters = new int[256];
 120:              int most_used_times = 0;
 121:              char? most_used_char=null;
 122:              foreach (char c in s)
 123:              {
 124:                  //錯誤處理:含有非ASCII碼狀況
 125:                  if (c > 255)
 126:                  {
 127:                      throw new ArgumentException("string contains UNICODE char");
 128:                  }
 129:   
 130:                  counters[c]++;
 131:   
 132:                  if (counters[c] > most_used_times)
 133:                  {
 134:                      most_used_times = counters[c];
 135:                      most_used_char = c;
 136:                  }
 137:              }
 138:              return most_used_char;
 139:          }
 140:   
 141:          /// <summary>
 142:          /// 處理FindMostFrequentlyUsedChar沒法處理並列第一的狀況
 143:          /// 時間複雜度依舊是O(n)
 144:          /// 
 145:          /// </summary>
 146:          public List<char> FindMostFrequentlyUsedChars(string s)
 147:          {
 148:              List<char> most_used_chars = new List<char>();
 149:              int[] counters = new int[256];
 150:              int most_used_times = 0;
 151:              foreach (char c in s)
 152:              {
 153:                  //錯誤處理:含有非ASCII碼狀況
 154:                  if (c > 255)
 155:                  {
 156:                      throw new ArgumentException("string contains UNICODE char");
 157:                  }
 158:   
 159:                  counters[c]++;
 160:   
 161:                  if (counters[c] > most_used_times)
 162:                  {
 163:                      most_used_chars.Clear();
 164:                      most_used_times = counters[c];
 165:                      most_used_chars.Add(c);
 166:                  }
 167:                  else if (counters[c] == most_used_times)
 168:                  {
 169:                      most_used_chars.Add(c);
 170:                  }
 171:              }
 172:              return most_used_chars;
 173:          }
 174:      }
相關文章
相關標籤/搜索