判斷重複字符存在：更有意義一點

時間 2019-11-08

標籤判斷重複字符存在有意義一點 1點简体版

原文原文鏈接

   1:      class Program

   2:      {

   3:          static void Main(string[] args)

   4:          {

   5:              Program p = new Program();

   6:              List<char> result = p.FindMostFrequentlyUsedChars("hjkluuiuikjk");

7:

   8:              Console.ReadKey();

   9:          }

10:

  11:          /// <summary>

  12:          /// 判斷字符串中是否存在重複字符

  13:          /// 該算法假設的前提條件：全部字符都是ASCII

  14:          /// 時間複雜度O(n),n=s.Length

  15:          /// 空間複雜度爲常數O(256)

  16:          /// 時間複雜度已經最低了，空間複雜度還能有更優化的解法嘛？

  17:          /// 參見IsStringWithNoDuplicatedChar_Implemenation2

  18:          /// </summary>

  19:          public bool IsStringWithNoDuplicatedChar_Implemenation1(string s)

  20:          {

  21:              bool[] flags = new bool[256];

  22:              foreach (char c in s)

  23:              {

  24:                  //錯誤處理：含有非ASCII碼狀況

  25:                  if (c > 255)

  26:                  {

  27:                      throw new ArgumentException("string contains UNICODE char");

  28:                  }

29:

  30:                  if (flags[c])

  31:                  {

  32:                      return false;

  33:                  }

  34:                  flags[c] = true;

  35:              }

  36:              return true;

  37:          }

38:

  39:          /// <summary>

  40:          /// bit映射法，該解法對比IsStringWithNoDuplicatedChar_Implemenation1空間使用上更少了

  41:          /// </summary>

  42:          public bool IsStringWithNoDuplicatedChar_Implemenation2(string s)

  43:          {

  44:              //注意是Int32而不是Int，在64位機器上Int=Int64

  45:              Int32[] flags = new Int32[8];

  46:              foreach (char c in s)

  47:              {

  48:                  //依舊須要該錯誤處理，由於假設前提是不變的,不然該算法無效

  49:                  if (c > 255)

  50:                  {

  51:                      throw new ArgumentException("string contains UNICODE char");

  52:                  }

53:

  54:                  int index = c / 32;

  55:                  int relative_position = c % 32;

  56:                  if ((flags[index] & (1 << relative_position)) >0)

  57:                  {

  58:                      return false;

  59:                  }

  60:                  flags[index] |= (1 << relative_position);

  61:              }

  62:              return true;

  63:          }

64:

  65:          /// <summary>

  66:          /// 再次增進假設，假設全部字符都是介於a-z的小寫字母

  67:          /// 26個字母可用一個32位的int映射

  68:          /// </summary>

  69:          public bool IsStringWithNoDuplicatedChar_Implemenation3(string s)

  70:          {

  71:              Int32 flag = 0;

  72:              foreach (char c in s)

  73:              {

  74:                  if (c <='a' && c>='z')

  75:                  {

  76:                      throw new ArgumentException("string contains char out of a-z");

  77:                  }

78:

  79:                  int relative_position = c - 'a';

  80:                  if ((flag & (1 << relative_position)) > 0)

  81:                  {

  82:                      return false;

  83:                  }

  84:                  flag |= (1 << relative_position);

  85:              }

  86:              return true;

  87:          }

88:

89:

  90:          /// <summary>

  91:          ///以上算法有意義麼？

  92:          ///很明顯這些算法都基於一個共同的前提就是字符集的基數是ASCII碼，

  93:          ///若是是Unicode，那麼無不管是哪種，都須要更大的空間

  94:          ///若是假設前提仍是ASCII,其實也還有更好的思路：

  95:          ///若是字符串長度大於字符集範圍，那麼必定屬於兩種狀況之一：

  96:          ///要麼有重複字符，要麼輸入非法

  97:          ///這即是IsStringWithNoDuplicatedChar_Implemenation4

  98:          /// </summary>

  99:          public bool IsStringWithNoDuplicatedChar_Implemenation4(string s)

 100:          {

 101:              if (s.Length > 256)

 102:              {

 103:                  return false;

 104:              }

 105:              else

 106:              {

 107:                  return IsStringWithNoDuplicatedChar_Implemenation2(s);

 108:              }

 109:          }

 110:

 111:          /// <summary>

 112:          /// 基於輸入是ASCII這個前提，

 113:          /// 長度大於256，根本不用判斷，確定有重複

 114:          /// 長度小於256，1-3算法其實對現代計算機來講簡直是小菜一碟，不值一提

 115:          /// 因此更有可能的應用場景是找到最常出現的字符，這比判斷是否有重複字符看起來更有價值一些

 116:          /// </summary>

 117:          public char? FindMostFrequentlyUsedChar(string s)

 118:          {

 119:              int[] counters = new int[256];

 120:              int most_used_times = 0;

 121:              char? most_used_char=null;

 122:              foreach (char c in s)

 123:              {

 124:                  //錯誤處理：含有非ASCII碼狀況

 125:                  if (c > 255)

 126:                  {

 127:                      throw new ArgumentException("string contains UNICODE char");

 128:                  }

 129:

 130:                  counters[c]++;

 131:

 132:                  if (counters[c] > most_used_times)

 133:                  {

 134:                      most_used_times = counters[c];

 135:                      most_used_char = c;

 136:                  }

 137:              }

 138:              return most_used_char;

 139:          }

 140:

 141:          /// <summary>

 142:          /// 處理FindMostFrequentlyUsedChar沒法處理並列第一的狀況

 143:          /// 時間複雜度依舊是O(n)

 144:          ///

 145:          /// </summary>

 146:          public List<char> FindMostFrequentlyUsedChars(string s)

 147:          {

 148:              List<char> most_used_chars = new List<char>();

 149:              int[] counters = new int[256];

 150:              int most_used_times = 0;

 151:              foreach (char c in s)

 152:              {

 153:                  //錯誤處理：含有非ASCII碼狀況

 154:                  if (c > 255)

 155:                  {

 156:                      throw new ArgumentException("string contains UNICODE char");

 157:                  }

 158:

 159:                  counters[c]++;

 160:

 161:                  if (counters[c] > most_used_times)

 162:                  {

 163:                      most_used_chars.Clear();

 164:                      most_used_times = counters[c];

 165:                      most_used_chars.Add(c);

 166:                  }

 167:                  else if (counters[c] == most_used_times)

 168:                  {

 169:                      most_used_chars.Add(c);

 170:                  }

 171:              }

 172:              return most_used_chars;

 173:          }

 174:      }

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。