memwatch是一個c++擴展,主要用來觀察nodejs內存泄露問題,基本用法以下:javascript
const memwatch = require('@airbnb/memwatch');
function LeakingClass() {
}
memwatch.gc();
var arr = [];
var hd = new memwatch.HeapDiff();
for (var i = 0; i < 10000; i++) arr.push(new LeakingClass);
var hde = hd.end();
console.log(JSON.stringify(hde, null, 2));
複製代碼
分析的版本爲@airbnb/memwatch。首先從binding.gyp開始入手:java
{
'targets': [
{
'target_name': 'memwatch',
'include_dirs': [
"<!(node -e \"require('nan')\")"
],
'sources': [
'src/heapdiff.cc',
'src/init.cc',
'src/memwatch.cc',
'src/util.cc'
]
}
]
}
複製代碼
這份配置表示其生成的目標是memwatch.node
,源碼是src目錄下的heapdiff.cc
、init.cc
、memwatch.cc
、util.cc
,在項目編譯的過程當中還須要include額外的nan目錄,nan目錄經過執行node -e "require('nan')
按照node模塊系統尋找nan
依賴,<!
表示後面是一條指令。node
memwatch的入口函數在init.cc
文件中,經過NODE_MODULE(memwatch, init);
進行聲明。當執行require('@airbnb/memwatch')
的時候會首先調用init
函數:c++
void init (v8::Handle<v8::Object> target)
{
Nan::HandleScope scope;
heapdiff::HeapDiff::Initialize(target);
Nan::SetMethod(target, "upon_gc", memwatch::upon_gc);
Nan::SetMethod(target, "gc", memwatch::trigger_gc);
Nan::AddGCPrologueCallback(memwatch::before_gc);
Nan::AddGCEpilogueCallback(memwatch::after_gc);
}
複製代碼
init函數的入口參數v8:Handle<v8:Object> target
能夠類比nodejs中的module.exports
的exports
對象。函數內部作的實現能夠分爲三塊,初始化target、給target綁定upon_gc
和gc
兩個函數、在nodejs的gc先後分別掛上對應的鉤子函數。git
到heapdiff.cc
文件中來看heapdiff::HeapDiff::Initialize(target);
的實現。github
void heapdiff::HeapDiff::Initialize ( v8::Handle<v8::Object> target )
{
Nan::HandleScope scope;
v8::Local<v8::FunctionTemplate> t = Nan::New<v8::FunctionTemplate>(New);
t->InstanceTemplate()->SetInternalFieldCount(1);
t->SetClassName(Nan::New<v8::String>("HeapDiff").ToLocalChecked());
Nan::SetPrototypeMethod(t, "end", End);
target->Set(Nan::New<v8::String>("HeapDiff").ToLocalChecked(), t->GetFunction());
}
複製代碼
Initialize
函數中建立一個叫作HeapDiff
的函數t
,同時在t
的原型鏈上綁了end
方法,使得js層面能夠執行vat hp = new memwatch.HeapDiff();hp.end()
。api
new memwatch.HeapDiff
實現當js執行new memwatch.HeapDiff();
的時候,c++層面會執行heapdiff::HeapDiff::New
函數,去掉註釋和沒必要要的宏,New函數精簡以下:bash
NAN_METHOD(heapdiff::HeapDiff::New)
{
if (!info.IsConstructCall()) {
return Nan::ThrowTypeError("Use the new operator to create instances of this object.");
}
Nan::HandleScope scope;
HeapDiff * self = new HeapDiff();
self->Wrap(info.This());
s_inProgress = true;
s_startTime = time(NULL);
self->before = v8::Isolate::GetCurrent()->GetHeapProfiler()->TakeHeapSnapshot(NULL);
s_inProgress = false;
info.GetReturnValue().Set(info.This());
}
複製代碼
能夠看到用戶在js層面執行var hp = new memwatch.HeapDiff();
的時候,c++層面會調用nodejs中的v8的api對對堆上內存打一個snapshot保存到self->before中,並將當前對象返回出去。app
memwatch.HeapDiff.End
實現當用戶執行hp.end()
的時候,會執行原型鏈上的end方法,也就是c++的heapdiff::HeapDiff::End
方法。一樣去掉冗餘的註釋以及宏,End方法能夠精簡以下:函數
NAN_METHOD(heapdiff::HeapDiff::End)
{
Nan::HandleScope scope;
HeapDiff *t = Unwrap<HeapDiff>( info.This() );
if (t->ended) {
return Nan::ThrowError("attempt to end() a HeapDiff that was already ended");
}
t->ended = true;
s_inProgress = true;
t->after = v8::Isolate::GetCurrent()->GetHeapProfiler()->TakeHeapSnapshot(NULL);
s_inProgress = false;
v8::Local<Value> comparison = compare(t->before, t->after);
((HeapSnapshot *) t->before)->Delete();
t->before = NULL;
((HeapSnapshot *) t->after)->Delete();
t->after = NULL;
info.GetReturnValue().Set(comparison);
}
複製代碼
在End函數中,拿到當前的HeapDiff對象以後,再對當前的堆上內存再打一個snapshot,調用compare函數對先後兩個snapshot對比後獲得comparison後,將先後兩次snapshot對象釋放掉,並將結果通知給js。
下面分析下compare函數的具體實現: compare函數內部會遞歸調用buildIDSet函數獲得最終堆快照的diff結果。
static v8::Local<Value>
compare(const v8::HeapSnapshot * before, const v8::HeapSnapshot * after)
{
Nan::EscapableHandleScope scope;
int s, diffBytes;
Local<Object> o = Nan::New<v8::Object>();
// first let's append summary information Local<Object> b = Nan::New<v8::Object>(); b->Set(Nan::New("nodes").ToLocalChecked(), Nan::New(before->GetNodesCount())); //b->Set(Nan::New("time"), s_startTime); o->Set(Nan::New("before").ToLocalChecked(), b); Local<Object> a = Nan::New<v8::Object>(); a->Set(Nan::New("nodes").ToLocalChecked(), Nan::New(after->GetNodesCount())); //a->Set(Nan::New("time"), time(NULL)); o->Set(Nan::New("after").ToLocalChecked(), a); // now let's get allocations by name
set<uint64_t> beforeIDs, afterIDs;
s = 0;
buildIDSet(&beforeIDs, before->GetRoot(), s);
b->Set(Nan::New("size_bytes").ToLocalChecked(), Nan::New(s));
b->Set(Nan::New("size").ToLocalChecked(), Nan::New(mw_util::niceSize(s).c_str()).ToLocalChecked());
diffBytes = s;
s = 0;
buildIDSet(&afterIDs, after->GetRoot(), s);
a->Set(Nan::New("size_bytes").ToLocalChecked(), Nan::New(s));
a->Set(Nan::New("size").ToLocalChecked(), Nan::New(mw_util::niceSize(s).c_str()).ToLocalChecked());
diffBytes = s - diffBytes;
Local<Object> c = Nan::New<v8::Object>();
c->Set(Nan::New("size_bytes").ToLocalChecked(), Nan::New(diffBytes));
c->Set(Nan::New("size").ToLocalChecked(), Nan::New(mw_util::niceSize(diffBytes).c_str()).ToLocalChecked());
o->Set(Nan::New("change").ToLocalChecked(), c);
// before - after will reveal nodes released (memory freed)
vector<uint64_t> changedIDs;
setDiff(beforeIDs, afterIDs, changedIDs);
c->Set(Nan::New("freed_nodes").ToLocalChecked(), Nan::New<v8::Number>(changedIDs.size()));
// here's where we'll collect all the summary information
changeset changes;
// for each of these nodes, let's aggregate the change information for (unsigned long i = 0; i < changedIDs.size(); i++) { const HeapGraphNode * n = before->GetNodeById(changedIDs[i]); manageChange(changes, n, false); } changedIDs.clear(); // after - before will reveal nodes added (memory allocated) setDiff(afterIDs, beforeIDs, changedIDs); c->Set(Nan::New("allocated_nodes").ToLocalChecked(), Nan::New<v8::Number>(changedIDs.size())); for (unsigned long i = 0; i < changedIDs.size(); i++) { const HeapGraphNode * n = after->GetNodeById(changedIDs[i]); manageChange(changes, n, true); } c->Set(Nan::New("details").ToLocalChecked(), changesetToObject(changes)); return scope.Escape(o); } 複製代碼
該函數中構造了兩個對象b(before)、a(after)用於保存先後兩個快照的詳細信息。用一個js對象描述以下:
// b(before) / a(after)
{
nodes: // heap snapshot中對象節點個數
size_bytes: // heap snapshot的對象大小(bytes)
size: // heap snapshot的對象大小(kb、mb)
}
複製代碼
進一步對先後兩次的快照進行分析能夠獲得o,o中的before、after對象就是先後兩次的snapshot對象的引用:
// o
{
before: { // before的堆snapshot
nodes:
size_bytes:
size:
},
after: { // after的堆snapshot
nodes:
size_bytes:
size:
},
change: {
freed_nodes: // gc掉的節點數量
allocated_nodes: // 新增節點數量
details: [ // 按照類型String、Array聚合出來的詳細信息
{
Array : {
what: // 類型
size_bytes: // 字節數bytes
size: // kb、mb
+: // 新增數量
-: // gc數量
}
},
{}
]
}
}
複製代碼
獲得兩次snapshot對比的結果後將o返回出去,在End函數中經過info.GetReturnValue().Set(comparison);
將結果傳遞到js層面。
下面來具體說下compare函數中的buildIDSet、setDiff以及manageChange函數的實現。 buildIDSet的用法:buildIDSet(&beforeIDs, before->GetRoot(), s);
,該函數會從堆snapshot的根節點出發,遞歸的尋找全部可以訪問的子節點,加入到集合seen中,作DFS統計全部可達節點的同時,也會對全部節點的shallowSize(對象自己佔用的內存,不包括引用的對象所佔內存)進行累加,統計當前堆所佔用的內存大小。其具體實現以下:
static void buildIDSet(set<uint64_t> * seen, const HeapGraphNode* cur, int & s)
{
Nan::HandleScope scope;
if (seen->find(cur->GetId()) != seen->end()) {
return;
}
if (cur->GetType() == HeapGraphNode::kObject &&
handleToStr(cur->GetName()).compare("HeapDiff") == 0)
{
return;
}
s += cur->GetShallowSize();
seen->insert(cur->GetId());
for (int i=0; i < cur->GetChildrenCount(); i++) {
buildIDSet(seen, cur->GetChild(i)->GetToNode(), s);
}
}
複製代碼
setDiff函數用法:setDiff(beforeIDs, afterIDs, changedIDs);
主要用來計算集合差集用的,具體實現很簡單,這裏直接貼代碼,再也不贅述:
typedef set<uint64_t> idset;
// why doesn't STL work? // XXX: improve this algorithm void setDiff(idset a, idset b, vector<uint64_t> &c) { for (idset::iterator i = a.begin(); i != a.end(); i++) { if (b.find(*i) == b.end()) c.push_back(*i); } } 複製代碼
manageChange函數用法:manageChange(changes, n, false);
,其做用在於作數據的聚合。對某個指定的set,按照set中對象的類型,聚合出每種對象建立了多少、銷燬了多少,實現以下:
static void manageChange(changeset & changes, const HeapGraphNode * node, bool added)
{
std::string type;
switch(node->GetType()) {
case HeapGraphNode::kArray:
type.append("Array");
break;
case HeapGraphNode::kString:
type.append("String");
break;
case HeapGraphNode::kObject:
type.append(handleToStr(node->GetName()));
break;
case HeapGraphNode::kCode:
type.append("Code");
break;
case HeapGraphNode::kClosure:
type.append("Closure");
break;
case HeapGraphNode::kRegExp:
type.append("RegExp");
break;
case HeapGraphNode::kHeapNumber:
type.append("Number");
break;
case HeapGraphNode::kNative:
type.append("Native");
break;
case HeapGraphNode::kHidden:
default:
return;
}
if (changes.find(type) == changes.end()) {
changes[type] = change();
}
changeset::iterator i = changes.find(type);
i->second.size += node->GetShallowSize() * (added ? 1 : -1);
if (added) i->second.added++;
else i->second.released++;
return;
}
複製代碼
upon_gc
和gc
實現這兩個方法的在init函數中聲明以下:
Nan::SetMethod(target, "upon_gc", memwatch::upon_gc);
Nan::SetMethod(target, "gc", memwatch::trigger_gc);
複製代碼
先看gc方法的實現,實際上對應memwatch::trigger_gc
,實現以下:
NAN_METHOD(memwatch::trigger_gc) {
Nan::HandleScope scope;
int deadline_in_ms = 500;
if (info.Length() >= 1 && info[0]->IsNumber()) {
deadline_in_ms = (int)(info[0]->Int32Value());
}
Nan::IdleNotification(deadline_in_ms);
Nan::LowMemoryNotification();
info.GetReturnValue().Set(Nan::Undefined());
}
複製代碼
經過Nan::IdleNotification
和Nan::LowMemoryNotification
觸發v8的gc功能。 再來看upon_gc
方法,該方法實際上會綁定一個函數,當執行到gc方法時,就會觸發該函數:
NAN_METHOD(memwatch::upon_gc) {
Nan::HandleScope scope;
if (info.Length() >= 1 && info[0]->IsFunction()) {
uponGCCallback = new UponGCCallback(info[0].As<v8::Function>());
}
info.GetReturnValue().Set(Nan::Undefined());
}
複製代碼
其中info[0]就是用戶傳入的回調函數。調用new UponGCCallback的時候,其對應的構造函數內部會執行:
UponGCCallback(v8::Local<v8::Function> callback_) : Nan::AsyncResource("memwatch:upon_gc") {
callback.Reset(callback_);
}
複製代碼
把用戶傳入的callback_函數設置到UponGCCallback類的成員變量callback上。upon_gc回調的觸發與gc的鉤子有關,詳細看下一節分析。
gc鉤子的掛載以下:
Nan::AddGCPrologueCallback(memwatch::before_gc);
Nan::AddGCEpilogueCallback(memwatch::after_gc);
複製代碼
先來看memwatch::before_gc
函數的實現,內部給gc開始記錄了時間:
NAN_GC_CALLBACK(memwatch::before_gc) {
currentGCStartTime = uv_hrtime();
}
複製代碼
再來看memwatch::after_gc
函數的實現,內部會在gc後記錄gc的結果到GCStats結構體中:
struct GCStats {
// counts of different types of gc events
size_t gcScavengeCount; // gc 掃描次數
uint64_t gcScavengeTime; // gc 掃描事件
size_t gcMarkSweepCompactCount; // gc標記清除整理的個數
uint64_t gcMarkSweepCompactTime; // gc標記清除整理的時間
size_t gcIncrementalMarkingCount; // gc增量標記的個數
uint64_t gcIncrementalMarkingTime; // gc增量標記的時間
size_t gcProcessWeakCallbacksCount; // gc處理weakcallback的個數
uint64_t gcProcessWeakCallbacksTime; // gc處理weakcallback的時間
};
複製代碼
對gc請求進行統計後,經過v8的api獲取堆的使用狀況,最終將結果保存到barton中,barton內部維護了一個uv_work_t的變量req,req的data字段指向barton對象自己。
NAN_GC_CALLBACK(memwatch::after_gc) {
if (heapdiff::HeapDiff::InProgress()) return;
uint64_t gcEnd = uv_hrtime();
uint64_t gcTime = gcEnd - currentGCStartTime;
switch(type) {
case kGCTypeScavenge:
s_stats.gcScavengeCount++;
s_stats.gcScavengeTime += gcTime;
return;
case kGCTypeMarkSweepCompact:
case kGCTypeAll:
break;
}
if (type == kGCTypeMarkSweepCompact) {
s_stats.gcMarkSweepCompactCount++;
s_stats.gcMarkSweepCompactTime += gcTime;
Nan::HandleScope scope;
Baton * baton = new Baton;
v8::HeapStatistics hs;
Nan::GetHeapStatistics(&hs);
timeval tv;
gettimeofday(&tv, NULL);
baton->gc_ts = (tv.tv_sec * 1000000) + tv.tv_usec;
baton->total_heap_size = hs.total_heap_size();
baton->total_heap_size_executable = hs.total_heap_size_executable();
baton->req.data = (void *) baton;
uv_queue_work(uv_default_loop(), &(baton->req),
noop_work_func, (uv_after_work_cb)AsyncMemwatchAfter);
}
}
複製代碼
在前面工做完成的基礎上,將結果丟到libuv的loop中,等到合適的實際觸發回調函數,在回調函數中能夠拿到req對象,經過訪問req.data對其作強制類型裝換能夠獲得barton對象,在loop的回調函數中,將barton中封裝的數據依次取出來,保存到stats對象中,並調用uponGCCallback的Call方法,傳入字面量stats
和stats對象。
static void AsyncMemwatchAfter(uv_work_t* request) {
Nan::HandleScope scope;
Baton * b = (Baton *) request->data;
// if there are any listeners, it's time to emit! if (uponGCCallback) { Local<Value> argv[2]; Local<Object> stats = Nan::New<v8::Object>(); stats->Set(Nan::New("gc_ts").ToLocalChecked(), javascriptNumber(b->gc_ts)); stats->Set(Nan::New("gcProcessWeakCallbacksCount").ToLocalChecked(), javascriptNumberSize(b->stats.gcProcessWeakCallbacksCount)); stats->Set(Nan::New("gcProcessWeakCallbacksTime").ToLocalChecked(), javascriptNumber(b->stats.gcProcessWeakCallbacksTime)); stats->Set(Nan::New("peak_malloced_memory").ToLocalChecked(), javascriptNumberSize(b->peak_malloced_memory)); stats->Set(Nan::New("gc_time").ToLocalChecked(), javascriptNumber(b->gc_time)); // the type of event to emit argv[0] = Nan::New("stats").ToLocalChecked(); argv[1] = stats; uponGCCallback->Call(2, argv); } delete b; } 複製代碼
最後在Call函數的內部調用js傳入的callback_函數,並將字面量stats
和stats對象傳遞到js層面,供上層用戶使用。
void Call(int argc, Local<v8::Value> argv[]) {
v8::Isolate *isolate = v8::Isolate::GetCurrent();
runInAsyncScope(isolate->GetCurrentContext()->Global(), Nan::New(callback), argc, argv);
}
複製代碼