原文請看個人博客:從源碼角度看traces.txt是如何生成的java
traces.txt 位於安卓系統下/data/anr目錄下,當系統中有應用出現ANR時,framework會在彈出Dialog的同時dump出當前各線程的堆棧狀況,方便開發者分析出ANR的root cause。android
ANR是Application Not Responsing 的簡稱,簡而言之,就是安卓系統內置提示用戶應用界面沒有反應的機制,是用來避免應用界面一直卡頓,增長系統用戶友好度的一種方式。c++
形成ANR的緣由不少,都是由於在主線程執行任務過久阻塞了界面更新致使的,主要有如下幾類:緩存
可以形成ANR的前提是任務是在主線程上執行的,執行什麼樣的任務主要有如下幾點:bash
那麼回到traces.txt文件,它到底包含了什麼信息能夠幫助開發者找到ANR問題的根源呢。網絡
在這篇文章,我先從一份traces.txt的日誌實例開始解析,而後再經過追蹤源碼來解釋traces.txt是如何生成的。app
我是基於Android 5.0原生代碼進行分析的,最新的一些代碼邏輯放到了AppErrors類中了,有些小差異,可是大致的邏輯和步驟沒有變化,不會影響閱讀。ide
// 觸發ANR的應用pid及觸發時間
----- pid 30307 at 2017-09-03 14:51:14 -----
Cmd line: com.example.androidyue.bitmapdemo
JNI: CheckJNI is off; workarounds are off; pins=0; globals=272
DALVIK THREADS:
(mutexes: tll=0 tsl=0 tscl=0 ghl=0)
// JAVA 態線程狀態
// 線程名稱; 優先級; 線程id; 線程狀態
"main" prio=5 tid=1 TIMED_WAIT
// 線程組; 線程suspend計數; 線程debug suspend計數; 同輩線程對象; 當前線程對象
| group="main" sCount=1 dsCount=0 obj=0x416eaf18 self=0x416d8650
// 線程id; 進程優先級; 調度者優先級; 調度者組名; 線程緩存句柄
| sysTid=30307 nice=0 sched=0/0 cgrp=apps handle=1074565528
// native線程狀態; 調度者狀態; 執行用戶代碼的總時間; 執行系統代碼的總時間; 任務cpu使用數量
| state=S schedstat=( 0 0 0 ) utm=5 stm=4 core=3
// 線程調用棧
at java.lang.VMThread.sleep(Native Method)
at java.lang.Thread.sleep(Thread.java:1044)
at java.lang.Thread.sleep(Thread.java:1026)
at com.example.androidyue.bitmapdemo.MainActivity$1.run(MainActivity.java:27)
at android.app.Activity.runOnUiThread(Activity.java:4794)
at com.example.androidyue.bitmapdemo.MainActivity.onResume(MainActivity.java:33)
at android.app.Instrumentation.callActivityOnResume(Instrumentation.java:1282)
at android.app.Activity.performResume(Activity.java:5405)
// Native態線程狀態
"Binder_5" prio=5 tid=32 NATIVE
| group="main" sCount=1 dsCount=0 obj=0x41dd6570 self=0x678fa458
| sysTid=29473 nice=0 sched=0/0 cgrp=apps handle=1732666432
| state=S schedstat=( 578000 10600000 3 ) utm=0 stm=0 core=0
#00 pc 00016fe4 /system/lib/libc.so (__ioctl+8)
#01 pc 0002a97d /system/lib/libc.so (ioctl+16)
#02 pc 00016ba1 /system/lib/libbinder.so (android::IPCThreadState::talkWithDriver(bool)+132)
#03 pc 00017363 /system/lib/libbinder.so (android::IPCThreadState::joinThreadPool(bool)+154)
#04 pc 0001b15d /system/lib/libbinder.so
#05 pc 00011267 /system/lib/libutils.so (android::Thread::_threadLoop(void*)+114)
#06 pc 0004679f /system/lib/libandroid_runtime.so (android::AndroidRuntime::javaThreadShell(void*)+66)
#07 pc 00010dcd /system/lib/libutils.so
#08 pc 0000e3d8 /system/lib/libc.so (__thread_entry+72)
#09 pc 0000dac4 /system/lib/libc.so (pthread_create+160)
at dalvik.system.NativeStart.run(Native Method)複製代碼
關鍵詞 | 解釋 | |
---|---|---|
prio | 優先級 | |
tid | 線程id | |
group | 組名 | |
sCount | 掛起次數 | |
dsCount | 調試下 掛起計數 | |
obj | 同輩線程對象 | |
self | 當前線程對象 | |
sysTid | 線程id | |
nice | 進程優先級 | |
sched | 調度者優先級 | |
cgrp | 調度者組名 | |
handle | 線程緩存句柄 | |
state | native線程狀態 | |
schedstat | 調度者狀態 | |
utm | 用戶態CPU時間 | |
stm | 內核態CPU時間 | |
core | 運行所在覈 |
JAVA狀態 | CPP狀態 | 解釋 |
---|---|---|
TERMINATED | ZOMBIE | 線程死亡 |
RUNNABLE | RUNNING/RUNNABLE | 可運行或正在運行 |
TIMED_WAITING | TIMED_WAIT | 執行了帶有超時參數的wait, sleep或join函數 |
BLOCKED | MONITOR | 線程阻塞,等待獲取對象鎖 |
WAITING | WAIT | 執行了無超時參數的wait |
NEW | INITIALIZING | 新建,正在初始化中 |
NEW | STARTING | 新建,正在啓動 |
RUNNABLE | NATIVE | 正在執行JNI本地函數 |
WAITING | VMWAIT | 正在等待VM資源 |
RUNNABLE | SUSPENDED | 線程暫停,一般因爲GC或者DEBUG被掛起 |
我將從代碼的最底層開始分析,再慢慢到觸發點的頂端,先從cpp開始函數
thread.ccoop
void Thread::DumpState(std::ostream& os, const Thread* thread, pid_t tid) {
// 組名
std::string group_name;
// 優先級
int priority;
// 是不是daemon線程
bool is_daemon = false;
// 拿到當前線程對象指針
Thread* self = Thread::Current();
if (thread != nullptr) {
...
}
if (gAborting == 0 && self != nullptr && thread != nullptr && thread->tlsPtr_.opeer != nullptr) {
ScopedObjectAccessUnchecked soa(self);
priority = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)
->GetInt(thread->tlsPtr_.opeer);
is_daemon = soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)
->GetBoolean(thread->tlsPtr_.opeer);
...
if (thread_group != nullptr) {
...
group_name = (group_name_string != nullptr) ? group_name_string->ToModifiedUtf8() : "<null>";
}
} else {
priority = GetNativePriority();
}
std::string scheduler_group_name(GetSchedulerGroupName(tid));
if (scheduler_group_name.empty()) {
scheduler_group_name = "default";
}
// 這裏開始輸出線程狀態
if (thread != nullptr) {
// 線程名輸出
os << '"' << *thread->tlsPtr_.name << '"';
if (is_daemon) {
os << " daemon";
}
// 優先級; ID; 狀態
os << " prio=" << priority
<< " tid=" << thread->GetThreadId()
<< " " << thread->GetState();
if (thread->IsStillStarting()) {
os << " (still starting up)";
}
os << "\n";
} else {
os << '"' << ::art::GetThreadName(tid) << '"'
<< " prio=" << priority
<< " (not attached)\n";
}
// 線程組; 線程suspend計數; 線程debug suspend計數; 同輩線程對象; 當前線程對象
if (thread != nullptr) {
MutexLock mu(self, *Locks::thread_suspend_count_lock_);
os << " | group=\"" << group_name << "\""
<< " sCount=" << thread->tls32_.suspend_count
<< " dsCount=" << thread->tls32_.debug_suspend_count
<< " obj=" << reinterpret_cast<void*>(thread->tlsPtr_.opeer)
<< " self=" << reinterpret_cast<const void*>(thread) << "\n";
}
// 線程id; 進程優先級; 調度者組名
os << " | sysTid=" << tid
<< " nice=" << getpriority(PRIO_PROCESS, tid)
<< " cgrp=" << scheduler_group_name;
// 調度者優先級; 緩存句柄
if (thread != nullptr) {
int policy;
sched_param sp;
CHECK_PTHREAD_CALL(pthread_getschedparam, (thread->tlsPtr_.pthread_self, &policy, &sp),
__FUNCTION__);
os << " sched=" << policy << "/" << sp.sched_priority
<< " handle=" << reinterpret_cast<void*>(thread->tlsPtr_.pthread_self);
}
os << "\n";
// Grab the scheduler stats for this thread.
std::string scheduler_stats;
if (ReadFileToString(StringPrintf("/proc/self/task/%d/schedstat", tid), &scheduler_stats)) {
scheduler_stats.resize(scheduler_stats.size() - 1); // Lose the trailing '\n'.
} else {
scheduler_stats = "0 0 0";
}
char native_thread_state = '?';
int utime = 0;
int stime = 0;
int task_cpu = 0;
GetTaskStats(tid, &native_thread_state, &utime, &stime, &task_cpu);
// native線程狀態; 調度者狀態; 執行用戶代碼的總時間; 執行系統代碼的總時間; 任務cpu使用數量
os << " | state=" << native_thread_state
<< " schedstat=( " << scheduler_stats << " )"
<< " utm=" << utime
<< " stm=" << stime
<< " core=" << task_cpu
<< " HZ=" << sysconf(_SC_CLK_TCK) << "\n";
if (thread != nullptr) {
// dump 線程當前棧
os << " | stack=" << reinterpret_cast<void*>(thread->tlsPtr_.stack_begin) << "-"
<< reinterpret_cast<void*>(thread->tlsPtr_.stack_end) << " stackSize="
<< PrettySize(thread->tlsPtr_.stack_size) << "\n";
// dump 線程鎖信息
os << " | held mutexes=";
for (size_t i = 0; i < kLockLevelCount; ++i) {
if (i != kMonitorLock) {
BaseMutex* mutex = thread->GetHeldMutex(static_cast<LockLevel>(i));
if (mutex != nullptr) {
os << " \"" << mutex->GetName() << "\"";
if (mutex->IsReaderWriterMutex()) {
ReaderWriterMutex* rw_mutex = down_cast<ReaderWriterMutex*>(mutex);
if (rw_mutex->GetExclusiveOwnerTid() == static_cast<uint64_t>(tid)) {
os << "(exclusive held)";
} else {
os << "(shared held)";
}
}
}
}
}
os << "\n";
}
}複製代碼
utils.cc
void GetTaskStats(pid_t tid, char* state, int* utime, int* stime, int* task_cpu) {
*utime = *stime = *task_cpu = 0;
std::string stats;
// 從stat節點獲取數據
if (!ReadFileToString(StringPrintf("/proc/self/task/%d/stat", tid), &stats)) {
return;
}
// Skip the command, which may contain spaces.
stats = stats.substr(stats.find(')') + 2);
// Extract the three fields we care about.
std::vector<std::string> fields;
Split(stats, ' ', &fields);
*state = fields[0][0];
*utime = strtoull(fields[11].c_str(), nullptr, 10);
*stime = strtoull(fields[12].c_str(), nullptr, 10);
*task_cpu = strtoull(fields[36].c_str(), nullptr, 10);
}複製代碼
thread_list.cc
// 讀取該線程中的全部狀態信息:JAVA態,Native態,Kernel態
static void DumpUnattachedThread(std::ostream& os, pid_t tid) NO_THREAD_SAFETY_ANALYSIS {
// [接上]
Thread::DumpState(os, nullptr, tid);
DumpKernelStack(os, tid, " kernel: ", false);
if (false) {
DumpNativeStack(os, tid, " native: ");
}
os << "\n";
}
void ThreadList::DumpUnattachedThreads(std::ostream& os) {
DIR* d = opendir("/proc/self/task");
if (!d) {
return;
}
Thread* self = Thread::Current();
dirent* e;
// 獲取當前進程中全部線程,dump這些線程的信息
while ((e = readdir(d)) != nullptr) {
char* end;
pid_t tid = strtol(e->d_name, &end, 10);
if (!*end) {
bool contains;
{
MutexLock mu(self, *Locks::thread_list_lock_);
contains = Contains(tid);
}
if (!contains) {
// [接上]
DumpUnattachedThread(os, tid);
}
}
}
closedir(d);
}
bool ThreadList::Contains(Thread* thread) {
return find(list_.begin(), list_.end(), thread) != list_.end();
}複製代碼
thread_list.cc
void ThreadList::DumpForSigQuit(std::ostream& os) {
...
DumpUnattachedThreads(os);
}複製代碼
runtime.cc
void Runtime::DumpForSigQuit(std::ostream& os) {
GetClassLinker()->DumpForSigQuit(os);
GetInternTable()->DumpForSigQuit(os);
GetJavaVM()->DumpForSigQuit(os);
GetHeap()->DumpForSigQuit(os);
TrackedAllocators::Dump(os);
os << "\n";
// [接上]
// dump 線程列表
thread_list_->DumpForSigQuit(os);
BaseMutex::DumpAll(os);
}複製代碼
singal_catcher.cc
void SignalCatcher::HandleSigQuit() {
Runtime* runtime = Runtime::Current();
std::ostringstream os;
// 輸出進程id以及當前dump時間
os << "\n"
<< "----- pid " << getpid() << " at " << GetIsoDate() << " -----\n";
DumpCmdLine(os);
// 輸出系統信息
std::string fingerprint = runtime->GetFingerprint();
os << "Build fingerprint: '" << (fingerprint.empty() ? "unknown" : fingerprint) << "'\n";
os << "ABI: '" << GetInstructionSetString(runtime->GetInstructionSet()) << "'\n";
os << "Build type: " << (kIsDebugBuild ? "debug" : "optimized") << "\n";
// [接上]
runtime->DumpForSigQuit(os);
...
os << "----- end " << getpid() << " -----\n";
Output(os.str());
}複製代碼
void* SignalCatcher::Run(void* arg) {
...
// Set up mask with signals we want to handle.
SignalSet signals;
signals.Add(SIGQUIT);
signals.Add(SIGUSR1);
while (true) {
// 等待信號
int signal_number = signal_catcher->WaitForSignal(self, signals);
...
switch (signal_number) {
case SIGQUIT:
// [接上]
signal_catcher->HandleSigQuit();
break;
case SIGUSR1:
signal_catcher->HandleSigUsr1();
break;
default:
LOG(ERROR) << "Unexpected signal %d" << signal_number;
break;
}
}
}
int SignalCatcher::WaitForSignal(Thread* self, SignalSet& signals) {
...
int signal_number = signals.Wait();
...
return signal_number;
}複製代碼
可見,android framework經過發送信號3來通知虛擬機來進行dump到traces.txt文件中的
ActivityManagerService.java
private static void dumpStackTraces(String tracesPath, ArrayList<Integer> firstPids, ProcessCpuTracker processCpuTracker, SparseArray<Boolean> lastPids, String[] nativeProcs) {
// 當trace寫完後,監聽並觸發onEvent方法
FileObserver observer = new FileObserver(tracesPath, FileObserver.CLOSE_WRITE) {
@Override
public synchronized void onEvent(int event, String path) {
notify();
}
};
try {
// 開始監聽
observer.startWatching();
// 首先收集firstPids鏈表裏的進程信息
if (firstPids != null) {
try {
int num = firstPids.size();
for (int i = 0; i < num; i++) {
synchronized (observer) {
// 發送信號3來觸發
Process.sendSignal(firstPids.get(i), Process.SIGNAL_QUIT);
observer.wait(200); // Wait for write-close, give up after 200msec
}
}
} catch (InterruptedException e) {
Slog.wtf(TAG, e);
}
}
// 收集native進程信息
...
// 最後收集CPU信息
...
} finally {
observer.stopWatching();
}
}複製代碼
public static File dumpStackTraces(boolean clearTraces, ArrayList<Integer> firstPids, ProcessCpuTracker processCpuTracker, SparseArray<Boolean> lastPids, String[] nativeProcs) {
// 獲取trace文件路徑
String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
...
File tracesFile = new File(tracesPath);
try {
...
FileUtils.setPermissions(tracesDir.getPath(), 0775, -1, -1); // drwxrwxr-x
if (clearTraces && tracesFile.exists()) tracesFile.delete();
tracesFile.createNewFile();
FileUtils.setPermissions(tracesFile.getPath(), 0666, -1, -1); // -rw-rw-rw-
} catch (IOException e) {
Slog.w(TAG, "Unable to prepare ANR traces file: " + tracesPath, e);
return null;
}
// [接上]
dumpStackTraces(tracesPath, firstPids, processCpuTracker, lastPids, nativeProcs);
return tracesFile;
}複製代碼
ActivityManagerService.java
final void appNotResponding(ProcessRecord app, ActivityRecord activity, ActivityRecord parent, boolean aboveSystem, final String annotation) {
ArrayList<Integer> firstPids = new ArrayList<Integer>(5);
SparseArray<Boolean> lastPids = new SparseArray<Boolean>(20);
...
final ProcessCpuTracker processCpuTracker = new ProcessCpuTracker(true);
// [接上]
File tracesFile = dumpStackTraces(true, firstPids, processCpuTracker, lastPids,
NATIVE_STACKS_OF_INTEREST);
...
Slog.e(TAG, info.toString());
if (tracesFile == null) {
// There is no trace file, so dump (only) the alleged culprit's threads to the log
Process.sendSignal(app.pid, Process.SIGNAL_QUIT);
}
addErrorToDropBox("anr", app, app.processName, activity, parent, annotation,
cpuInfo, tracesFile, null);
...
}複製代碼
觸發ANR的方式上面我已經列舉過了,這裏我以觸發Broadcast Timeout時進行分析
BroadcastQueue.java
private final class AppNotResponding implements Runnable {
private final ProcessRecord mApp;
private final String mAnnotation;
public AppNotResponding(ProcessRecord app, String annotation) {
mApp = app;
mAnnotation = annotation;
}
@Override
public void run() {
// [接上]
mService.appNotResponding(mApp, null, null, false, mAnnotation);
}
}複製代碼
BroadcastQueue.java
final void broadcastTimeoutLocked(boolean fromMsg) {
...
String anrMessage = null;
...
if (app != null) {
anrMessage = "Broadcast of " + r.intent.toString();
}
...
if (anrMessage != null) {
// [接上]
mHandler.post(new AppNotResponding(app, anrMessage));
}
}複製代碼
private final class BroadcastHandler extends Handler {
public BroadcastHandler(Looper looper) {
super(looper, null, true);
}
@Override
public void handleMessage(Message msg) {
switch (msg.what) {
...
case BROADCAST_TIMEOUT_MSG: {
synchronized (mService) {
// [接上]
broadcastTimeoutLocked(true);
}
} break;
...
}
}
};複製代碼
BroadcastQueue.java
final void setBroadcastTimeoutLocked(long timeoutTime) {
if (! mPendingBroadcastTimeoutMessage) {
Message msg = mHandler.obtainMessage(BROADCAST_TIMEOUT_MSG, this);
mHandler.sendMessageAtTime(msg, timeoutTime);
mPendingBroadcastTimeoutMessage = true;
}
}
final void processNextBroadcast(boolean fromMsg) {
if (! mPendingBroadcastTimeoutMessage) {
long timeoutTime = r.receiverTime + mTimeoutPeriod;
// [接上]
setBroadcastTimeoutLocked(timeoutTime);
}
}複製代碼
下面總結一下觸發廣播ANR後輸出traces.txt的過程: