上篇博文特來電混沌工程實踐中,咱們詳細介紹了特來電混沌工程實踐的設計和規劃。目前咱們已經啓動了應用層的混沌實驗。html
在應用層的混沌實驗中,咱們常常須要模擬HSF服務容器的線程被打爆、CPU使用率25%,50%,75%、端口被打爆、內存泄露、服務超時、服務異常等場景。socket
前期咱們實現的時候,通常會選擇一個典型的HSF服務去模擬注入上述混沌事件場景。可是每次注入,準備時間長、耗時長、控制複雜,遇到這些問題。ide
後來和阿里的中亭老師交流,收穫到了啓發,咱們應該寫一個混沌事件注入工具。而後根據混沌實驗場景,靈活的注入混沌事件。工具
所以,咱們啓動了混沌實驗注入工具的研發。先說一下具體的思路吧:post
1、統一混沌事件的注入接口,實現各種混沌事件注入ui
1. 先定義混沌事件注入接口IChaosEvent,包含兩個方法Inject注入和Stop中止spa
1 interface IChaosEvent 2 { 3 void Inject(Dictionary<string, string> context); 4 5 void Stop(); 6 }
同時增長一個混沌事件枚舉ChaosEventType線程
public enum ChaosEventType { CPU25, CPU50, CPU75, ServiceTimeout, ServiceException, Memory, Threads, Ports }
2. 實現各種混沌事件注入設計
HighCpu-25%CPU使用率3d
class Chaos_HighCPU25 : IChaosEvent { CancellationTokenSource cts; public Chaos_HighCPU25() { cts = new CancellationTokenSource(); } public void Inject(Dictionary<string, string> context) { try { var count = (25 / 100.0) * Environment.ProcessorCount; for (int i = 0; i < count; i++) { var cpuTask = new Task(() => { while (true && cts.IsCancellationRequested == false) { } }, cts.Token, TaskCreationOptions.LongRunning); cpuTask.Start(); } } catch { } } public void Stop() { cts.Cancel(); } }
HighCpu-50%CPU使用率
class Chaos_HighCPU50 : IChaosEvent { CancellationTokenSource cts; public Chaos_HighCPU50() { cts = new CancellationTokenSource(); } public void Inject(Dictionary<string, string> context) { try { var count = (50 / 100.0) * Environment.ProcessorCount; for (int i = 0; i < count; i++) { var cpuTask = new Task(() => { while (true && cts.IsCancellationRequested == false) { } }, cts.Token, TaskCreationOptions.LongRunning); cpuTask.Start(); } } catch { } } public void Stop() { cts.Cancel(); } }
HighCpu-75%CPU使用率
class Chaos_HighCPU75 : IChaosEvent { CancellationTokenSource cts; public Chaos_HighCPU75() { cts = new CancellationTokenSource(); } public void Inject(Dictionary<string, string> context) { try { var count = (75 / 100.0) * Environment.ProcessorCount; for (int i = 0; i < count; i++) { var cpuTask = new Task(() => { while (true && cts.IsCancellationRequested == false) { } }, cts.Token, TaskCreationOptions.LongRunning); cpuTask.Start(); } } catch { } } public void Stop() { cts.Cancel(); } }
內存泄露-2G
class Chaos_Memory : IChaosEvent { CancellationTokenSource cts; static string OneKB = "111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111"; static List<string> list = new List<string>(); public Chaos_Memory() { cts = new CancellationTokenSource(); } public void Inject(Dictionary<string, string> context) { try { var count = System.Configuration.ConfigurationManager.AppSettings["Chaos.MemoryMB"]; if (count == null) count = "2000"; int c; if (int.TryParse(count, out c)) { Task task = new Task( () => { for (int k = 0; k < c / 2; k++) { StringBuilder builder = new StringBuilder(); for (int i = 0; i < 1024; i++) { builder.Append(OneKB); } list.Add(builder.ToString()); } }, cts.Token, TaskCreationOptions.LongRunning); task.Start(); } } catch { } } public void Stop() { cts.Cancel(); list.Clear(); list = new List<string>(); } }
端口被打爆:
class Chaos_Ports : IChaosEvent { CancellationTokenSource cts; static List<Socket> sockets; public Chaos_Ports() { cts = new CancellationTokenSource(); sockets = new List<Socket>(); } public void Inject(Dictionary<string, string> context) { try { var count = Convert.ToInt32(context["Count"]); var server = Convert.ToString(context["Server"]); var sp = server.Split(':'); var task = Task.Factory.StartNew(() => { for (int i = 0; i < count; i++) { try { Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true); socket.Connect(sp[0], Convert.ToInt32(sp[1])); SetKeepAliveValues(socket, true, 36000000, 1000); sockets.Add(socket); } catch (Exception e) { } } while (cts.IsCancellationRequested == false) { Thread.Sleep(20 * 60000); } }, TaskCreationOptions.LongRunning); } catch { } } public void Stop() { cts.Cancel(); if (sockets != null) { foreach (var socket in sockets) { try { socket.Close(); } catch { } } sockets.Clear(); sockets = null; } } public int SetKeepAliveValues ( System.Net.Sockets.Socket Socket, bool On_Off, uint KeepaLiveTime, uint KeepaLiveInterval ) { int Result = -1; unsafe { TcpKeepAlive KeepAliveValues = new TcpKeepAlive(); KeepAliveValues.On_Off = Convert.ToUInt32(On_Off); KeepAliveValues.KeepaLiveTime = KeepaLiveTime; KeepAliveValues.KeepaLiveInterval = KeepaLiveInterval; byte[] InValue = new byte[12]; for (int I = 0; I < 12; I++) InValue[I] = KeepAliveValues.Bytes[I]; Result = Socket.IOControl(IOControlCode.KeepAliveValues, InValue, null); } return Result; } } [ System.Runtime.InteropServices.StructLayout ( System.Runtime.InteropServices.LayoutKind.Explicit ) ] unsafe struct TcpKeepAlive { [System.Runtime.InteropServices.FieldOffset(0)] [ System.Runtime.InteropServices.MarshalAs ( System.Runtime.InteropServices.UnmanagedType.ByValArray, SizeConst = 12 ) ] public fixed byte Bytes[12]; [System.Runtime.InteropServices.FieldOffset(0)] public uint On_Off; [System.Runtime.InteropServices.FieldOffset(4)] public uint KeepaLiveTime; [System.Runtime.InteropServices.FieldOffset(8)] public uint KeepaLiveInterval; }
線程被打爆:
class Chaos_Threads : IChaosEvent { CancellationTokenSource cts; List<Task> tasks = new List<Task>(); public Chaos_Threads() { cts = new CancellationTokenSource(); } public void Inject(Dictionary<string, string> context) { try { var count = context["Threads"]; int c; if (Int32.TryParse(count, out c)) { //Parallel.For(0, c, new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount }, (i) => for (int i = 0; i < c; i++) { var task = new Task(() => { for (int j = 0; j < 120; j++) { if (cts.IsCancellationRequested) return; Thread.Sleep(10 * 1000); } }, cts.Token); task.Start(); tasks.Add(task); } } } catch { } } public void Stop() { cts.Cancel(); if (tasks != null) { foreach (var task in tasks) { try { task.Dispose(); } catch { } } } } }
服務調用異常:
class Chaos_ServiceException : IChaosEvent { bool isStop = false; public Chaos_ServiceException() { } public void Inject(Dictionary<string, string> context) { if (isStop == false) throw new Exception("Chaos_ServiceException"); } public void Stop() { isStop = true; } }
服務調用超時:
class Chaos_ServiceTimeout : IChaosEvent { CancellationTokenSource cts; bool isStop = false; public Chaos_ServiceTimeout() { cts = new CancellationTokenSource(); } public void Inject(Dictionary<string, string> context) { if (isStop == false) Task.Delay(10 * 1000, cts.Token).Wait(); } public void Stop() { cts.Cancel(); isStop = true; } }
2、設計一個統一的混沌事件注入器,支持各種混沌事件注入,支持混沌事件的熱更新和取消
1. ChaosEventInjecter
支持混沌事件接口實現的建立、混沌事件注入(全局注入一次,每次調用都注入)、混沌事件取消(中止)
混沌事件接口實現的建立
private IChaosEvent GetOrCreateChaosEvent(ChaosEventType chaosEventType) { if (!eventDic.ContainsKey(chaosEventType)) { lock (syncObj) { if (!eventDic.ContainsKey(chaosEventType)) { IChaosEvent chaosEvent = null; switch (chaosEventType) { case ChaosEventType.CPU75: chaosEvent = new Chaos_HighCPU75(); break; case ChaosEventType.CPU50: chaosEvent = new Chaos_HighCPU50(); break; case ChaosEventType.CPU25: chaosEvent = new Chaos_HighCPU25(); break; case ChaosEventType.Memory: chaosEvent = new Chaos_Memory(); break; case ChaosEventType.Threads: chaosEvent = new Chaos_Threads(); break; case ChaosEventType.ServiceException: chaosEvent = new Chaos_ServiceException(); break; case ChaosEventType.ServiceTimeout: chaosEvent = new Chaos_ServiceTimeout(); break; case ChaosEventType.Ports: chaosEvent = new Chaos_Ports(); break; default: break; } if (chaosEvent != null) { eventDic.TryAdd(chaosEventType, chaosEvent); return chaosEvent; } } } } return eventDic[chaosEventType]; }
混沌事件注入(全局注入一次,每次調用都注入)
1 private static object syncObj = new object(); 2 private static object eventObj = new object(); 3 4 private static ChaosEventInjecter instance; 5 private ConcurrentDictionary<ChaosEventType, IChaosEvent> eventDic; 6 7 private ConcurrentDictionary<ChaosEventType, ChaosEventType> triggeredEvent; 8 9 private ChaosEventInjecter() 10 { 11 eventDic = new ConcurrentDictionary<ChaosEventType, IChaosEvent>(); 12 triggeredEvent = new ConcurrentDictionary<ChaosEventType, ChaosEventType>(); 13 }
public void SingletonInject(ChaosEventType chaosEventType, Dictionary<string, string> context = null) { if (!triggeredEvent.ContainsKey(chaosEventType)) { lock (eventObj) { if (!triggeredEvent.ContainsKey(chaosEventType)) { var chaosEvent = GetOrCreateChaosEvent(chaosEventType); if (chaosEvent == null) return; chaosEvent.Inject(context); triggeredEvent.TryAdd(chaosEventType, chaosEventType); } } } }
按服務每次調用都注入
public void ServiceInject(List<string> serviceId) { ChaosEventManager.GetIntance().StopInject = StopInject; if (ChaosEventManager.GetIntance().IsEmpty()) { StopInject(); } foreach (var service in serviceId) { var chaosEvent = ChaosEventManager.GetIntance().GetChaosEvent(service); if (chaosEvent != null) { switch (chaosEvent.ChaosEventType) { case ChaosEventType.ServiceException: case ChaosEventType.ServiceTimeout: Inject(chaosEvent.ChaosEventType, chaosEvent.ChaosValue); break; default: SingletonInject(chaosEvent.ChaosEventType, chaosEvent.ChaosValue); break; } } } }
中止混沌注入
public void StopInject() { if (triggeredEvent == null && triggeredEvent.Count == 0) return; foreach (var chaosEventType in triggeredEvent) { var chaosEvent = GetOrCreateChaosEvent(chaosEventType.Key); if (chaosEvent == null) return; chaosEvent.Stop(); } triggeredEvent = new ConcurrentDictionary<ChaosEventType, ChaosEventType>(); }
完整的ChaosEventInjecter代碼:
/// <summary> /// 混沌事件注入器 /// </summary> public class ChaosEventInjecter { private static object syncObj = new object(); private static object eventObj = new object(); private static ChaosEventInjecter instance; private ConcurrentDictionary<ChaosEventType, IChaosEvent> eventDic; private ConcurrentDictionary<ChaosEventType, ChaosEventType> triggeredEvent; private ChaosEventInjecter() { eventDic = new ConcurrentDictionary<ChaosEventType, IChaosEvent>(); triggeredEvent = new ConcurrentDictionary<ChaosEventType, ChaosEventType>(); } public static ChaosEventInjecter GetIntance() { if (instance == null) { lock (syncObj) { if (instance == null) { instance = new ChaosEventInjecter(); } } } return instance; } public void SingletonInject() { var eventType = System.Configuration.ConfigurationManager.AppSettings["Chaos.Event"]; if (eventType != null) { ChaosEventType chaosEvent = (ChaosEventType)Enum.Parse(typeof(ChaosEventType), eventType.ToString()); SingletonInject(chaosEvent); } } public void ServiceInject(List<string> serviceId) { ChaosEventManager.GetIntance().StopInject = StopInject; if (ChaosEventManager.GetIntance().IsEmpty()) { StopInject(); } foreach (var service in serviceId) { var chaosEvent = ChaosEventManager.GetIntance().GetChaosEvent(service); if (chaosEvent != null) { switch (chaosEvent.ChaosEventType) { case ChaosEventType.ServiceException: case ChaosEventType.ServiceTimeout: Inject(chaosEvent.ChaosEventType, chaosEvent.ChaosValue); break; default: SingletonInject(chaosEvent.ChaosEventType, chaosEvent.ChaosValue); break; } } } } public void SingletonInject(ChaosEventType chaosEventType, Dictionary<string, string> context = null) { if (!triggeredEvent.ContainsKey(chaosEventType)) { lock (eventObj) { if (!triggeredEvent.ContainsKey(chaosEventType)) { var chaosEvent = GetOrCreateChaosEvent(chaosEventType); if (chaosEvent == null) return; chaosEvent.Inject(context); triggeredEvent.TryAdd(chaosEventType, chaosEventType); } } } } public void StopInject() { if (triggeredEvent == null && triggeredEvent.Count == 0) return; foreach (var chaosEventType in triggeredEvent) { var chaosEvent = GetOrCreateChaosEvent(chaosEventType.Key); if (chaosEvent == null) return; chaosEvent.Stop(); } triggeredEvent = new ConcurrentDictionary<ChaosEventType, ChaosEventType>(); } public void Inject(ChaosEventType chaosEventType, Dictionary<string, string> context = null) { var chaosEvent = GetOrCreateChaosEvent(chaosEventType); if (chaosEvent == null) return; chaosEvent.Inject(context); } private IChaosEvent GetOrCreateChaosEvent(ChaosEventType chaosEventType) { if (!eventDic.ContainsKey(chaosEventType)) { lock (syncObj) { if (!eventDic.ContainsKey(chaosEventType)) { IChaosEvent chaosEvent = null; switch (chaosEventType) { case ChaosEventType.CPU75: chaosEvent = new Chaos_HighCPU75(); break; case ChaosEventType.CPU50: chaosEvent = new Chaos_HighCPU50(); break; case ChaosEventType.CPU25: chaosEvent = new Chaos_HighCPU25(); break; case ChaosEventType.Memory: chaosEvent = new Chaos_Memory(); break; case ChaosEventType.Threads: chaosEvent = new Chaos_Threads(); break; case ChaosEventType.ServiceException: chaosEvent = new Chaos_ServiceException(); break; case ChaosEventType.ServiceTimeout: chaosEvent = new Chaos_ServiceTimeout(); break; case ChaosEventType.Ports: chaosEvent = new Chaos_Ports(); break; default: break; } if (chaosEvent != null) { eventDic.TryAdd(chaosEventType, chaosEvent); return chaosEvent; } } } } return eventDic[chaosEventType]; } }
2. ChaosEventManager
混沌事件管理類,負責從Redis中實時獲取每一個服務配置的混沌事件,支持混沌事件的定時更新10s:
class ChaosEventManager { private static object syncObj = new object(); private static ChaosEventManager instance; private ConcurrentDictionary<string, ChaosEvent> eventDic; CacheService service = CacheService.GetInstance("DefaultPool"); public Action StopInject { get; set; } private ChaosEventManager() { eventDic = new ConcurrentDictionary<string, ChaosEvent>(); GetAllChaosEvents(); StartUpdateTask(); } private void StartUpdateTask() { var task = new Task(() => { while (true) { Thread.Sleep(10000); GetAllChaosEvents(); } }, TaskCreationOptions.LongRunning); task.ContinueWith((t) => { if (t.IsFaulted) StartUpdateTask(); }); task.Start(); } public static ChaosEventManager GetIntance() { if (instance == null) { lock (syncObj) { if (instance == null) { instance = new ChaosEventManager(); } } } return instance; } public ChaosEvent GetChaosEvent(string serviceId) { if (eventDic.ContainsKey(serviceId)) return eventDic[serviceId]; else return null; } public bool IsEmpty() { return eventDic == null || eventDic.Count == 0; } private void GetAllChaosEvents() { var newEventDic = new ConcurrentDictionary<string, ChaosEvent>(); using (var client = service.GetClient()) { List<string> keys = client.GetHashKeys("ChaosEvents"); if (keys != null) keys.ForEach(x => newEventDic.TryAdd(x, client.GetValueFromHash<ChaosEvent>("ChaosEvents", x))); } foreach (var item in newEventDic) { eventDic[item.Key] = item.Value; } if (newEventDic.Count == 0) if (StopInject != null) StopInject(); } }
3、在HSF、API網關、中間件SDK層面依賴注入混沌事件注入器
在HSF服務調用時增長混沌實驗事件AOP注入
API網關、中間件SDK相似的方法進行注入。
同時咱們設計了一個混沌事件注入工具:支持混沌事件實時注入、取消、仿真模擬執行:
分享:仿真模擬執行的效果,例如CPU25%使用率:
以上工具和設計思路,分享給你們。
周國慶
2019/3/30