C#裏面濫用String形成的性能問題

前兩天給咱們的json寫一個解析函數, 以前用的正宗的json parser, 支持完整的json特性. 可是實際上咱們用到特性, 只有key-value的映射, value的類型只有數字字符串兩種類型. 因爲parse的速度比較慢, 因此我打算本身用字符串解析一遍. 第一個能工做的原型出來的時候, 速度和json解析差很少. 作了profile以後發現, 絕大部分時間都浪費在構造String和檢索IndexOf上面.git

下了coreclr的源碼研究了一下, 發現String.Split在實現的時候, 先掃描一遍split, 計算有多少個元素, 而後分配一個Array, 而後再去作Split操做. Split操做裏面還會再new一個新的String出來, 順便作一下拷貝. 看到這裏我就驚呆了, 原本String在C#和Jawa這兩個託管語言裏面都是不可變的, 那麼爲何他們不用一個Slice去構造一個SubString呢?github

網上搜了一下, 也沒發現有人寫的StringSlice或者相似的東西, 我就順手擼了一個StringView, 一個只讀的StringSlice.json

  1 using System.Collections.Generic;
  2 
  3 public unsafe struct StringView
  4 {
  5     public static readonly StringView Empty = new StringView("");
  6 
  7     public StringView(string str) : this(str, 0, str.Length) { }
  8 
  9     public StringView(string str, int begin, int length)
 10     {
 11         this.str = str;
 12         this.begin = begin;
 13         this.length = length;
 14         if (str.Length <= 0) return;
 15 
 16         if (this.begin < 0 ||
 17             this.begin >= this.str.Length ||
 18             this.begin + this.length > this.str.Length)
 19         {
 20             throw new System.Exception("StringView's Constructor OutOfBound");
 21         }
 22     }
 23 
 24     public int IndexOf(char c, int start = 0)
 25     {
 26         fixed (char* p = this.str)
 27         {
 28             for (int i = start; i < length; ++i)
 29             {
 30                 if (p[this.begin + i] == c) return i;
 31             }
 32         }
 33 
 34         return -1;
 35     }
 36 
 37     private static bool ArrayContains(char[] array, char c)
 38     {
 39         int length = array.Length;
 40         fixed (char* p = array)
 41         {
 42             for (int i = 0; i < length; ++i)
 43                 if (p[i] == c) return true;
 44         }
 45 
 46         return false;
 47     }
 48 
 49     public int IndexOf(char[] array, int start = 0)
 50     {
 51         if (array.Length == 1) return this.IndexOf(array[0], start);
 52 
 53         fixed (char* p = this.str)
 54         {
 55             for (int i = start; i < length; ++i)
 56             {
 57                 if (ArrayContains(array, p[this.begin + i])) return i;
 58             }
 59         }
 60 
 61         return -1;
 62     }
 63 
 64     public int IndexOf(string s, int start = 0)
 65     {
 66         int s1_length = this.str.Length;
 67         int s2_length = s.Length;
 68         fixed (char* p1 = this.str)
 69         {
 70             fixed (char* p2 = s)
 71             {
 72                 int index = this.IndexOf(p2[0], start);
 73                 while (index >= 0)
 74                 {
 75                     if (s2_length > s1_length - this.begin - index)
 76                         return -1;
 77                     bool match = true;
 78                     for (int i = 0; i < s2_length; ++i)
 79                     {
 80                         if (p1[this.begin + index + i] != p2[i]) { match = false; break; }
 81                     }
 82                     if (match) return index;
 83 
 84                     index = this.IndexOf(p2[0], index + 1);
 85                 }
 86                 return -1;
 87             }
 88         }
 89     }
 90 
 91     public unsafe char this[int index]
 92     {
 93         get
 94         {
 95             if (index < 0 || index >= this.length)
 96             {
 97                 throw new System.Exception("StringView's Index OutOfBound");
 98             }
 99 
100             fixed (char* p = this.str)
101             {
102                 return p[this.begin + index];
103             }
104         }
105     }
106 
107     public StringView SubString(int begin)
108     {
109         return this.SubString(begin, this.length - begin);
110     }
111 
112     public StringView SubString(int begin, int length)
113     {
114         return new StringView(this.str, this.begin + begin, length);
115     }
116 
117     public List<StringView> Split(char split, List<StringView> array)
118     {
119         array.Clear();
120 
121         int index = 0;
122         int pos1 = 0, pos2 = 0;
123         pos2 = this.IndexOf(split);
124         while (pos2 > 0 && pos2 < this.length)
125         {
126             array.Add(new StringView(str, this.begin + pos1, pos2 - pos1));
127             pos1 = pos2 + 1;
128             pos2 = this.IndexOf(split, pos1);
129             ++index;
130         }
131         if (pos1 != this.length) array.Add(new StringView(str, this.begin + pos1, this.length - pos1));
132 
133         return array;
134     }
135 
136     public override bool Equals(object obj)
137     {
138         if (obj is StringView)
139         {
140             StringView v = (StringView)obj;
141             return this.Equals(v);
142         }
143         return false;
144     }
145 
146     public bool Equals(StringView v)
147     {
148         if (v.Length != this.Length) return false;
149         for (int i = 0; i < this.Length; ++i)
150         {
151             if (this[i] != v[i]) return false;
152         }
153         return true;
154     }
155 
156     internal static int CombineHashCodes(int h1, int h2)
157     {
158         return (((h1 << 5) + h1) ^ h2);
159     }
160 
161     public override int GetHashCode()
162     {
163         int hash_code = 0;
164         for (int i = 0; i < this.length; ++i)
165         {
166             hash_code = CombineHashCodes(hash_code, this[i].GetHashCode());
167         }
168         return hash_code;
169     }
170 
171     public int Length { get { return this.length; } }
172 
173     public override string ToString()
174     {
175         return this.str.Substring(begin, length);
176     }
177 
178     public string GetRawString() { return this.str; }
179     public int GetBegin() { return this.begin; }
180 
181     private string str;
182     private int begin;
183     private int length;
184 }

爲了方便替換String, 不少接口都保持了一致. 目前這個版本只是知足我本身的需求, 之後能夠考慮繼續完善添加String的函數進來.ide

以前說的IndexOf也比較耗, 由於String索引器會帶有邊界檢測, 而IndexOf一直在用索引器, 因此我的感受是不太合適的, 因此個人StringView一直在用指針….函數

PS: 修改以後的純text parse, 速度比json parse的速度快一倍以上, 性能還不錯, 實際上還有提高的空間性能

PS: 如今比較完整的StringView已經上傳至github, https://github.com/egmkang/StringView 添加了ToInt64, StringBuilder.Append支持ui

相關文章
相關標籤/搜索