Demo:https://github.com/caozhiyuan/ClrProfiler.Tracegit
爲了實現自動、無依賴地跟蹤分析應用程序性能(達到商業級APM效果),做者但願能動態修改應用字節碼。在相關調研以後,決定採用profiler api進行實現。github
做者將對.NET ClrProfiler 字節碼重寫技術進行相關闡述。api
Profiler是微軟提供的一套跟蹤和分析應用的工具,其提供了一套api能夠跟蹤和分析.NET程序運行狀況。其原理架構圖以下:數據結構
本文所使用的方式是直接對方法字節碼進行重寫,動態引用程序集、插入異常捕捉代碼、插入執行先後代碼。架構
其中相關基礎概念涉及CLI標準(ECMS-355),CLI標準對公用語言運行時進行了詳細的描述。app
本文主要涉及到 :ide
1. 程序集定義、引用工具
2. 類型定義、引用性能
3. 方法定義、引用ui
4. 操做碼
5. 簽名(此文對簽名格式舉了不少例子,能夠幫助理解)
在此文中提供了入門級講解,下面咱們直接正題。
在JIt編譯時候將會對CorProfiler類進行初始化,在此環節咱們主要對於監聽的事件進行訂閱和配置初始化工做,咱們主要關心ModuleLoad事件。
HRESULT STDMETHODCALLTYPE CorProfiler::Initialize(IUnknown *pICorProfilerInfoUnk) { const HRESULT queryHR = pICorProfilerInfoUnk->QueryInterface(__uuidof(ICorProfilerInfo8), reinterpret_cast<void **>(&this->corProfilerInfo)); if (FAILED(queryHR)) { return E_FAIL; } const DWORD eventMask = COR_PRF_MONITOR_JIT_COMPILATION | COR_PRF_DISABLE_TRANSPARENCY_CHECKS_UNDER_FULL_TRUST | /* helps the case where this profiler is used on Full CLR */ COR_PRF_DISABLE_INLINING | COR_PRF_MONITOR_MODULE_LOADS | COR_PRF_DISABLE_ALL_NGEN_IMAGES; this->corProfilerInfo->SetEventMask(eventMask); this->clrProfilerHomeEnvValue = GetEnvironmentValue(ClrProfilerHome); if(this->clrProfilerHomeEnvValue.empty()) { Warn("ClrProfilerHome Not Found"); return E_FAIL; } this->traceConfig = LoadTraceConfig(this->clrProfilerHomeEnvValue); if (this->traceConfig.traceAssemblies.empty()) { Warn("TraceAssemblies Not Found"); return E_FAIL; } Info("CorProfiler Initialize Success"); return S_OK; }
在ModuleLoadFinished後,咱們主要獲取程序集的EntryPointToken(mian方法token)、運行時mscorlib.dll(net framework)或System.Private.CoreLib.dll(netcore)程序版本基礎信息以供後面動態引用。
HRESULT STDMETHODCALLTYPE CorProfiler::ModuleLoadFinished(ModuleID moduleId, HRESULT hrStatus) { auto module_info = GetModuleInfo(this->corProfilerInfo, moduleId); if (!module_info.IsValid() || module_info.IsWindowsRuntime()) { return S_OK; } if (module_info.assembly.name == "dotnet"_W || module_info.assembly.name == "MSBuild"_W) { return S_OK; } const auto entryPointToken = module_info.GetEntryPointToken(); ModuleMetaInfo* module_metadata = new ModuleMetaInfo(entryPointToken, module_info.assembly.name); { std::lock_guard<std::mutex> guard(mapLock); moduleMetaInfoMap[moduleId] = module_metadata; } if (entryPointToken != mdTokenNil) { Info("Assembly:{} EntryPointToken:{}", ToString(module_info.assembly.name), entryPointToken); } if (module_info.assembly.name == "mscorlib"_W || module_info.assembly.name == "System.Private.CoreLib"_W) { if(!corAssemblyProperty.szName.empty()) { return S_OK; } CComPtr<IUnknown> metadata_interfaces; auto hr = corProfilerInfo->GetModuleMetaData(moduleId, ofRead | ofWrite, IID_IMetaDataImport2, metadata_interfaces.GetAddressOf()); RETURN_OK_IF_FAILED(hr); auto pAssemblyImport = metadata_interfaces.As<IMetaDataAssemblyImport>( IID_IMetaDataAssemblyImport); if (pAssemblyImport.IsNull()) { return S_OK; } mdAssembly assembly; hr = pAssemblyImport->GetAssemblyFromScope(&assembly); RETURN_OK_IF_FAILED(hr); hr = pAssemblyImport->GetAssemblyProps( assembly, &corAssemblyProperty.ppbPublicKey, &corAssemblyProperty.pcbPublicKey, &corAssemblyProperty.pulHashAlgId, NULL, 0, NULL, &corAssemblyProperty.pMetaData, &corAssemblyProperty.assemblyFlags); RETURN_OK_IF_FAILED(hr); corAssemblyProperty.szName = module_info.assembly.name; return S_OK; } return S_OK; }
下面進行方法編譯,在JITCompilationStarted時,咱們會進行Main方法字節碼插入動態加載Trace程序集(Main方法前添加Assembly.LoadFrom(path))。
在指定方法編譯時,咱們須要對方法簽名進行分析,方法簽名中主要包含方法調用方式、參數個數、泛型參數個數、返回類型、參數類型集合。
在分析完方法簽名和方法名後與咱們配置的方法進行匹配,若是一致進行IL重寫。咱們會對代碼修改爲以下方式:
private Task DataRead(string a, int b) { return Task.Delay(10); } private Task DataReadWrapper(string a, int b) { object ret = null; Exception ex = null; MethodTrace methodTrace = null; try { methodTrace = (MethodTrace) ((TraceAgent) TraceAgent.GetInstance()) .BeforeMethod(this.GetType(), this, new object[] {a, b}, functiontoken); ret = Task.Delay(10); goto T; } catch (Exception e) { ex = e; throw; } finally { if (methodTrace != null) { methodTrace.EndMethod(ret, ex); } } T: return (Task)ret; }
其中主要包含方法本地變量簽名重寫、方法體字節重寫(包含代碼體、異常體)。
方法本地變量簽名重寫代碼:
// add ret ex methodTrace var to local var HRESULT ModifyLocalSig(CComPtr<IMetaDataImport2>& pImport, CComPtr<IMetaDataEmit2>& pEmit, ILRewriter& reWriter, mdTypeRef exTypeRef, mdTypeRef methodTraceTypeRef) { HRESULT hr; PCCOR_SIGNATURE rgbOrigSig = NULL; ULONG cbOrigSig = 0; UNALIGNED INT32 temp = 0; if (reWriter.m_tkLocalVarSig != mdTokenNil) { IfFailRet(pImport->GetSigFromToken(reWriter.m_tkLocalVarSig, &rgbOrigSig, &cbOrigSig)); //Check Is ReWrite or not const auto len = CorSigCompressToken(methodTraceTypeRef, &temp); if(cbOrigSig - len > 0){ if(rgbOrigSig[cbOrigSig - len -1]== ELEMENT_TYPE_CLASS){ if (memcmp(&rgbOrigSig[cbOrigSig - len], &temp, len) == 0) { return E_FAIL; } } } } auto exTypeRefSize = CorSigCompressToken(exTypeRef, &temp); auto methodTraceTypeRefSize = CorSigCompressToken(methodTraceTypeRef, &temp); ULONG cbNewSize = cbOrigSig + 1 + 1 + methodTraceTypeRefSize + 1 + exTypeRefSize; ULONG cOrigLocals; ULONG cNewLocalsLen; ULONG cbOrigLocals = 0; if (cbOrigSig == 0) { cbNewSize += 2; reWriter.cNewLocals = 3; cNewLocalsLen = CorSigCompressData(reWriter.cNewLocals, &temp); } else { cbOrigLocals = CorSigUncompressData(rgbOrigSig + 1, &cOrigLocals); reWriter.cNewLocals = cOrigLocals + 3; cNewLocalsLen = CorSigCompressData(reWriter.cNewLocals, &temp); cbNewSize += cNewLocalsLen - cbOrigLocals; } const auto rgbNewSig = new COR_SIGNATURE[cbNewSize]; *rgbNewSig = IMAGE_CEE_CS_CALLCONV_LOCAL_SIG; ULONG rgbNewSigOffset = 1; memcpy(rgbNewSig + rgbNewSigOffset, &temp, cNewLocalsLen); rgbNewSigOffset += cNewLocalsLen; if (cbOrigSig > 0) { const auto cbOrigCopyLen = cbOrigSig - 1 - cbOrigLocals; memcpy(rgbNewSig + rgbNewSigOffset, rgbOrigSig + 1 + cbOrigLocals, cbOrigCopyLen); rgbNewSigOffset += cbOrigCopyLen; } rgbNewSig[rgbNewSigOffset++] = ELEMENT_TYPE_OBJECT; rgbNewSig[rgbNewSigOffset++] = ELEMENT_TYPE_CLASS; exTypeRefSize = CorSigCompressToken(exTypeRef, &temp); memcpy(rgbNewSig + rgbNewSigOffset, &temp, exTypeRefSize); rgbNewSigOffset += exTypeRefSize; rgbNewSig[rgbNewSigOffset++] = ELEMENT_TYPE_CLASS; methodTraceTypeRefSize = CorSigCompressToken(methodTraceTypeRef, &temp); memcpy(rgbNewSig + rgbNewSigOffset, &temp, methodTraceTypeRefSize); rgbNewSigOffset += methodTraceTypeRefSize; IfFailRet(pEmit->GetTokenFromSig(&rgbNewSig[0], cbNewSize, &reWriter.m_tkLocalVarSig)); return S_OK; }
方法體重寫主要涉及到以下數據結構:
struct ILInstr { ILInstr* m_pNext; ILInstr* m_pPrev; unsigned m_opcode; unsigned m_offset; union { ILInstr* m_pTarget; INT8 m_Arg8; INT16 m_Arg16; INT32 m_Arg32; INT64 m_Arg64; }; }; struct EHClause { CorExceptionFlag m_Flags; ILInstr* m_pTryBegin; ILInstr* m_pTryEnd; ILInstr* m_pHandlerBegin; // First instruction inside the handler ILInstr* m_pHandlerEnd; // Last instruction inside the handler union { DWORD m_ClassToken; // use for type-based exception handlers ILInstr* m_pFilter; // use for filter-based exception handlers // (COR_ILEXCEPTION_CLAUSE_FILTER is set) }; };
il_rewriter.cpp會將方法體字節解析成一個雙向鏈表,便於咱們在鏈表中插入字節碼。咱們在方法頭指針前插入pre執行代碼,同時新建一個ret指針,在ret指針前插入catch 和finally塊字節碼(須要判斷方法返回類型,進行適當拆箱處理),原ret操做碼所有改成goto到新建的endfinally指針next處,最後咱們爲原方法新增catch和finally異常處理體。這樣咱們就實現了整個方法的攔截。
最後看咱們TraceAgent代碼實現,咱們經過Type和functiontoken獲取到MethodBase,而後經過配置獲取目標跟蹤程序集實現對方法的跟蹤和分析。
public EndMethodDelegate BeforeWrappedMethod(object type, object invocationTarget, object[] methodArguments, uint functionToken) { if (invocationTarget == null) { throw new ArgumentException(nameof(invocationTarget)); } var traceMethodInfo = new TraceMethodInfo { InvocationTarget = invocationTarget, MethodArguments = methodArguments, Type = (Type) type }; var functionInfo = GetFunctionInfoFromCache(functionToken, traceMethodInfo); traceMethodInfo.MethodBase = functionInfo.MethodBase; if (functionInfo.MethodWrapper == null) { PrepareMethodWrapper(functionInfo, traceMethodInfo); } return functionInfo.MethodWrapper?.BeforeWrappedMethod(traceMethodInfo); }
經過Profiler API咱們動態實現了.NET應用的跟蹤和分析,而且只要配置環境變量(profiler.dll目錄等)。與傳統的dynamicproxy或手動埋點相比,其更加靈活,且無依賴。