從.Net版本演變看String和StringBuilder性能之爭

在C#中string關鍵字的映射實際上指向.NET基類System.String。System.String是一個功能很是強大且用途很是普遍的基類,因此咱們在用C#string的時候實際就是在用.NET Framework String。String上是一個不可變的數據類型,一旦對字符串對象進行了初始化,該字符串對象就不能改變了。表面上修改字符串內容的方法和運算符實際上建立一個新字符串,因此重複修改給定的字符串,效率會很低。因此.Net Framework定義了另外一個StringBuild類以提升字符串處理的性能,但String和StringBuild之間又有什麼聯繫呢。html

 如下一個示例基於版本.Net Framework2.0這個示例主要是參考重談字符串性能,先定義一個簡單性能計數器主要目的有:數組

(1)打印出各字符串處理方法的消耗時間ide

(2)CPU時鐘週期性能

(3)執行過程當中垃圾回收器回收次數ui

 public class CodeTimer
    {
        public delegate void Action();
        /// <summary>
        /// 初始化
        /// </summary>
        public static void Initialize()
        {
            Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.High;
            Thread.CurrentThread.Priority = ThreadPriority.Highest;
            Time("", 1, () => { });
        }
        public static void Time(string name, int iteration, Action action)
        {
            if (String.IsNullOrEmpty(name)) return;

            // 1.
            ConsoleColor currentForeColor = Console.ForegroundColor;
            Console.ForegroundColor = ConsoleColor.Yellow;
            Console.WriteLine(name);

            // 2.
            GC.Collect(GC.MaxGeneration, GCCollectionMode.Forced);
            int[] gcCounts = new int[GC.MaxGeneration + 1];
            for (int i = 0; i <= GC.MaxGeneration; i++)
            {
                gcCounts[i] = GC.CollectionCount(i);
            }

            // 3.
            Stopwatch watch = new Stopwatch();
            watch.Start();
            ulong cycleCount = GetCycleCount();
            for (int i = 0; i < iteration; i++) action();
            ulong cpuCycles = GetCycleCount() - cycleCount;
            watch.Stop();

            // 4.
            Console.ForegroundColor = currentForeColor;
            Console.WriteLine("\tTime Elapsed:\t" + watch.ElapsedMilliseconds.ToString("N0") + "ms");
            Console.WriteLine("\tCPU Cycles:\t" + cpuCycles.ToString("N0"));

            // 5.
            for (int i = 0; i <= GC.MaxGeneration; i++)
            {
                int count = GC.CollectionCount(i) - gcCounts[i];
                Console.WriteLine("\tGen " + i + ": \t\t" + count);
            }

            Console.WriteLine();
        }

        private static ulong GetCycleCount()
        {
            ulong cycleCount = 0;
            QueryThreadCycleTime(GetCurrentThread(), ref cycleCount);
            return cycleCount;
        }

        [DllImport("kernel32.dll")]
        [return: MarshalAs(UnmanagedType.Bool)]
        static extern bool QueryThreadCycleTime(IntPtr threadHandle, ref ulong cycleTime);

        [DllImport("kernel32.dll")]
        static extern IntPtr GetCurrentThread();
    }

原文連接:一個簡單的性能計數器:CodeTimerthis

定義一個StringListBuilder用List<string>先將全部字符串保存起來最後轉化爲字符串數組,再返回字符串spa

public class StringListBuilder
    {
        private List<string> m_list = new List<string>();

        public StringListBuilder Append(string s)
        {
            this.m_list.Add(s);
            return this;
        }

        public override string ToString()
        {
            return String.Concat(this.m_list.ToArray());
        }
    }

定義一個StrPerformance類用於維護各個字符串處理的方法pwa

 public class StrPerformance
    {
        private static readonly string STR = "0123456789";

        public static string NormalConcat(int count)
        {
            var result = "";
            for (int i = 0; i < count; i++) result += STR;
            return result;
        }

        public static string StringBuilder(int count)
        {
            var builder = new StringBuilder();
            for (int i = 0; i < count; i++) builder.Append(STR);
            return builder.ToString();
        }

        public static string StringListBuilder(int count)
        {
            var builder = new StringListBuilder();
            for (int i = 0; i < count; i++) builder.Append(STR);
            return builder.ToString();
        }

        public static string StringConcat(int count)
        {
            var array = new string[count];
            for (int i = 0; i < count; i++) array[i] = STR;
            return String.Concat(array);
        }
    }
View Code

用性能計數器記錄各個方法執行過程而且打印出對應的參數線程

CodeTimer.Initialize();

            for (int i = 2; i <= 2048; i *= 2)
            {
                CodeTimer.Time(
                String.Format("StringListBuilder ({0})", i),
                10000,
                () => StrPerformance.StringListBuilder(i));

                CodeTimer.Time(
                  String.Format("String concat ({0})", i),
                  10000,
                  () => StrPerformance.StringConcat(i));
                CodeTimer.Time(
                    String.Format("StringBuilder ({0})", i),
                    10000,
                    () => StrPerformance.StringBuilder(i));

            }

分析能夠得出,廣受追捧的StringBuilder性能彷佛並非最好的,String.Concat方法有時候有時候更適合使用。那麼爲何String.Concat方法性能那麼高,StringBuilder反而比StringListBuilder要差,要知道StringListBuilder還要維護一個集合,經過反編譯咱們看一下.NET2.0的String.Concat和StringBuilder究竟是怎麼實現的。3d

先看在.Net2.0下StringBuilder的Append和ToString方法的實現過程,Append和ToString實現過程。

// System.Text.StringBuilder
public StringBuilder Append(string value)
{
    if (value == null)
    {
        return this;
    }
    string text = this.m_StringValue;
    IntPtr intPtr = Thread.InternalGetCurrentThread();
    if (this.m_currentThread != intPtr)
    {
        text = string.GetStringForStringBuilder(text, text.Capacity);
    }
    int length = text.Length;
    int requiredLength = length + value.Length;
    if (this.NeedsAllocation(text, requiredLength))
    {
        string newString = this.GetNewString(text, requiredLength);
        newString.AppendInPlace(value, length);
        this.ReplaceString(intPtr, newString);
    }
    else
    {
        text.AppendInPlace(value, length);
        this.ReplaceString(intPtr, text);
    }
    return this;
}
View Code
public override string ToString()
{
    string currentValue = this.m_currentValue;

    if (this.m_currentThread != Thread.InternalGetCurrentThread())
    {
        return string.InternalCopy(currentValue);
    }

    // 若是這個字符串對象「太空」的話
    if ((2 * currentValue.Length) < currentValue.ArrayLength)
    {
        // 則構造一個「滿當」地對象
        return string.InternalCopy(currentValue);
    }

    // 將字符序列最後放一個\0
    currentValue.ClearPostNullChar();

    // 既然容器已經「暴露」,則設制「當前線程」的標識爲Zero,
    // 這意味着下次操做會生成新字符串對象(即新的容器)
    this.m_currentThread = IntPtr.Zero;

    // 若是「還不算太空」,則返回當前對象
    return currentValue;
}
View Code

StringBuilder的ToString方法比較有意思,它會判斷究竟是「構造一個新對象」仍是就「直接返回當前容器」給你。若是直接返回當前容器,則可能會浪費較多內存,而若是構造一個新對象,則又會損耗性能。讓StringBuilder作出決定的即是容器內部的字符序列佔「最大容積」的比例,若是超過一半,則代表「還不算太空」,便選擇「時間」,直接返回容器;不然,StringBuilder會認爲仍是選擇「空間」較爲合算,便構造一個新對象並返回,至於當前的容器便會和StringBuilder一道被GC回收了。

同時咱們能夠看到,若是返回了新對象,則當前容器還能夠繼續在Append時使用,不然Append方法便會由於m_currentValue爲Zero而建立新的容器。不過,從ToString的實現中也能夠看出,屢次調用ToString方法必定返回新建的對象。

而String.Concat又作了什麼,String類Concat的具體實現過程

public static string Concat(params string[] values)
{
    int totalLength = 0;

    if (values == null)
    {
        throw new ArgumentNullException("values");
    }

    string[] arrayToConcate = new string[values.Length];

    // 遍歷源數組,填充拼接用的數組
    for (int i = 0; i < values.Length; i++)
    {
        string str = values[i];

        // null做爲空字符串對待
        arrayToConcate[i] = (str == null) ? Empty : str;

        // 累計字符串總長度
        totalLength += arrayToConcate[i].Length;

        // 若是越界了,拋異常
        if (totalLength < 0)
        {
            throw new OutOfMemoryException();
        }
    }

    // 拼接
    return ConcatArray(arrayToConcate, totalLength);
}
View Code
[MethodImpl(MethodImplOptions.InternalCall)]
private static extern string FastAllocateString(int length);

private static string ConcatArray(string[] values, int totalLength)
{
    // 分配目標字符串所佔用的空間(即建立對象)
    string dest = FastAllocateString(totalLength);

    int destPos = 0;

    for (int i = 0; i < values.Length; i++)
    {
        // 不斷將源字符串的每一個元素填充至目標位置
        FillStringChecked(dest, destPos, values[i]);

        // 偏移量不斷更新
        destPos += values[i].Length;
    }

    return dest;
}
View Code
private static unsafe void FillStringChecked(string dest, int destPos, string src)
{
    int length = src.Length;
    if (length > (dest.Length - destPos))
    {
        throw new IndexOutOfRangeException();
    }

    fixed (char* chDest = &dest.m_firstChar)
    {
        fixed (char* chSrc = &src.m_firstChar)
        {
            wstrcpy(chDest + destPos, chSrc, length);
        }
    }
}
View Code

因爲數組中的字符串都是肯定的所以事先計算出結果的長度,因而遍歷源字符串數組,將它們一個一個複製(或叫作「填充」)到目標字符串的某一段位置上去,由於在此以前已經肯定結果的大小,所以直接建立一個「容器」便可,剩下的只是填充數據而已。既然能夠不浪費任何一寸空間,也沒有任何多餘的操做,這也是String.Concat高效的緣由。

 

一樣的代碼移植到.Net 4.5上會不會還像以前同樣String.Concat在處理鏈接字符串中性能最高

此次StringBuilder又從新回到了咱們最初的印象中,在處理多字符串鏈接的時候StringBuilder是性能最高的,經過和.Net 2.0的實驗結果來看StringListBuilder和String Concat的性能變化不大,而彷佛StringBuilder的性能提升了一倍,那麼在.NET 4.5中StringBuilder的Append方法又作了什麼呢,下面咱們來看一下.Net 4.5中Append的具體實現過程

 public unsafe StringBuilder Append(string value)
        {
            if (value != null)
            {
                //StringBuilder內維護的一個字符數組
                char[] chunkChars = this.m_ChunkChars;
                int chunkLength = this.m_ChunkLength;
                int length = value.Length;
                int num = chunkLength + length;

                //沒必要增長m_ChunkChars字符數組的長度
                if (num < chunkChars.Length)
                {
                    if (length <= 2)
                    {
                        if (length > 0)
                        {
                            chunkChars[chunkLength] = value[0];
                        }
                        if (length > 1)
                        {
                            chunkChars[chunkLength + 1] = value[1];
                        }
                    }
                    else
                    {
                        fixed (string text = value)
                        {
                            char* ptr = text;
                            if (ptr != null)
                            {
                                ptr += RuntimeHelpers.OffsetToStringData / 2;
                            }
                            fixed (char* ptr2 = &chunkChars[chunkLength])
                            {
                                string.wstrcpy(ptr2, ptr, length);
                            }
                        }
                    }
                    this.m_ChunkLength = num;
                }
                //增長m_ChunkChars數組的長度
                else
                {
                    this.AppendHelper(value);
                }
            }
            return this;
        }
 private unsafe void AppendHelper(string value)
        {
            fixed (string text = value)
            {
                //去字符串的地址
                char* ptr = text;
                if (ptr != null)
                {
                    ptr += RuntimeHelpers.OffsetToStringData / 2;
                }
                this.Append(ptr, value.Length);
            }
        }

        public unsafe StringBuilder Append(char* value, int valueCount)
        {
            if (valueCount < 0)
            {
                throw new ArgumentOutOfRangeException("valueCount", Environment.GetResourceString("ArgumentOutOfRange_NegativeCount"));
            }
            int num = valueCount + this.m_ChunkLength;
            if (num <= this.m_ChunkChars.Length)
            {
                //把字符串一個一個複製到m_ChunkChars字符數組中
                StringBuilder.ThreadSafeCopy(value, this.m_ChunkChars, this.m_ChunkLength, valueCount);
                this.m_ChunkLength = num;
            }
            else
            {
                int num2 = this.m_ChunkChars.Length - this.m_ChunkLength;
                if (num2 > 0)
                {
                    StringBuilder.ThreadSafeCopy(value, this.m_ChunkChars, this.m_ChunkLength, num2);
                    this.m_ChunkLength = this.m_ChunkChars.Length;
                }
                int num3 = valueCount - num2;
                this.ExpandByABlock(num3);
                StringBuilder.ThreadSafeCopy(value + num2, this.m_ChunkChars, 0, num3);
                this.m_ChunkLength = num3;
            }
            return this;
        }
View Code

在分析代碼可知在.Net 4.5StringBuilder中內部維護了一個m_ChunkChars字符數組,來避免不斷擴容,不斷複製的過程所形成的性能消耗,因此StringBuilder性能又成爲三者中最高的一個。

看了老趙blog以後,(此處省去一千溢美之詞)——只想說一句:「我對閣下的景仰有如滔滔江水,連綿不絕,又如黃河氾濫,一發而不可收拾!

相關文章
相關標籤/搜索