本文爲項目開發總結的原創文檔。linux
本項目,添加一個watchdog守護進程,用來監控環境的三大進程mozart、bitbox、mplayer,任何一個進程出現故障,整個環境進行重啓。vim
以下文詳細代碼中的processExists;app
實現難點突破:this
看門狗實際上就是一個定時器,其硬件內部維護了一個計數的寄存器。每當時鐘信號到來時,計數寄存器減掉1,。若是減到0,則重啓系統。spa
若是減到0以前,系統又設置計數寄存器到一個較大的值,則系統永遠不會重啓。 .net
watchdog的基本實現原理是:rest
用戶空間程序打開 /dev/mem設備(俗稱「開門放狗」),code
就會致使在內核中啓動一個定時器(本項目mdt_start_count的入參是20000ms即20s),此後,用戶空間程序須要保證在20分鐘以內向這個設備寫入數據(俗稱「按期喂狗」),每次寫操做會致使從新設定定時器(本項目是每sleep 10s從新去設定)。若是用戶空間程序在20分鐘以內沒有寫操做,定時器到期會致使一次系統Reboot操做(「狗咬人了」)。 blog
watchdog.c 內容以下:進程
#include <string.h> #include <stdlib.h> #include <signal.h> #include <stdbool.h> #include <unistd.h> #include <pthread.h> #include <errno.h> #include <time.h> #include <sys/types.h> #include <pwd.h> #include <sys/stat.h> #include <linux/input.h> #include <fcntl.h> #include <execinfo.h> #include <sys/mman.h> #define WDT_IOBASE (0x10002000) #define MAP_SIZE 0xFF #define JZ_EXTAL_RTC 32768 /* RTC extal freq: 32.768 KHz */ #define TCU_IOBASE 0x10002000 #define TCU_TSCR (0x3C) /* Timer Stop Clear Register */ #define WDT_TCSR (0x0c) /* rw, 32, 0x???????? */ #define WDT_TCER (0x04) /* rw, 32, 0x???????? */ #define WDT_TDR (0x00) /* rw, 32, 0x???????? */ #define WDT_TCNT (0x08) /* rw, 32, 0x???????? */ #define TCU_TSSR (0x2C) /* Timer Stop Set Register */ static void wdt_start_count(int msecs) { static int dev_fd; dev_fd = open("/dev/mem", O_RDWR | O_NDELAY); if (dev_fd < 0) { printf("open(/dev/mem) failed."); return 0; } unsigned char *map_base=(unsigned char * )mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd, WDT_IOBASE ); int time = JZ_EXTAL_RTC / 64 * msecs / 1000; if(time > 65535) time = 65535; #if 0 outl(1 << 16,TCU_IOBASE + TCU_TSCR); outl(0,WDT_IOBASE + WDT_TCNT); //counter outl(time,WDT_IOBASE + WDT_TDR); //data outl((3<<3 | 1<<1),WDT_IOBASE + WDT_TCSR); outl(0,WDT_IOBASE + WDT_TCER); outl(1,WDT_IOBASE + WDT_TCER); #endif
/*上文屏蔽部分是內核空間對寄存器的操做,修改爲用戶空間對寄存器的操做,關鍵是物理地址在用戶空間須要經過mmap進行轉換*/
// printf("wdt_start_count begin~~~. map_base = %p,time=%d\n",map_base,time); *( unsigned long*)(map_base + TCU_TSCR) = (1 << 16); *( unsigned long*)(map_base + WDT_TCNT) = 0;//counter *( unsigned long*)(map_base + WDT_TDR) = time;//data *( unsigned long*)(map_base + WDT_TCSR) = (3<<3 | 1<<1); *( unsigned long*)(map_base + WDT_TCER) = 0; *( unsigned long*)(map_base + WDT_TCER) = 1; close(dev_fd); // printf("wdt_start_count end.\n"); } static void wdt_stop_count(void) { static int dev_fd; dev_fd = open("/dev/mem", O_RDWR | O_NDELAY); if (dev_fd < 0) { printf("open(/dev/mem) failed."); return 0; } unsigned char *map_base=(unsigned char * )mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd, WDT_IOBASE ); printf("\033[1;33mwdt_stop_count begin~~~. map_base = %p\n\033[m",map_base); #if 0 outl(1 << 16,TCU_IOBASE + TCU_TSCR); outl(0,WDT_IOBASE + WDT_TCNT); //counter outl(65535,WDT_IOBASE + WDT_TDR); //data outl(1 << 16,TCU_IOBASE + TCU_TSSR); #endif *( unsigned long*)(map_base + TCU_TSCR) = (1 << 16); *( unsigned long*)(map_base + WDT_TCNT) = 0;//counter *( unsigned long*)(map_base + WDT_TDR) = 65535; *( unsigned long*)(map_base + TCU_TSSR) = (1 << 16); close(dev_fd); printf("wdt_stop_count end.\n"); } void jz_wdt_restart() { printf("Restarting after 4 ms\n"); while(1) { wdt_start_count(20000); sleep(10); } while(1) printf("check wdt.\n"); } /*判 斷 進 程 是 否 存 在*/ bool processExists(char * process_name) { FILE *ptr; int RE_BUF_SIZE = 32; char rebuff[RE_BUF_SIZE]; char ps[128]; snprintf(ps, sizeof(ps), "ps | grep %s |grep -v grep| wc -l", process_name); if((ptr = popen(ps, "r")) != NULL) { int count = 0; fgets(rebuff, RE_BUF_SIZE, ptr); if(rebuff != NULL) { count = atoi(rebuff); } pclose(ptr); return count >= 1; } printf("Current process %s is not Exist!!!!\n",process_name); return false; } static char *signal_str[] = { [1] = "SIGHUP", [2] = "SIGINT", [3] = "SIGQUIT", [4] = "SIGILL", [5] = "SIGTRAP", [6] = "SIGABRT", [7] = "SIGBUS", [8] = "SIGFPE", [9] = "SIGKILL", [10] = "SIGUSR1", [11] = "SIGSEGV", [12] = "SIGUSR2", [13] = "SIGPIPE", [14] = "SIGALRM", [15] = "SIGTERM", [16] = "SIGSTKFLT", [17] = "SIGCHLD", [18] = "SIGCONT", [19] = "SIGSTOP", [20] = "SIGTSTP", [21] = "SIGTTIN", [22] = "SIGTTOU", [23] = "SIGURG", [24] = "SIGXCPU", [25] = "SIGXFSZ", [26] = "SIGVTALRM", [27] = "SIGPROF", [28] = "SIGWINCH", [29] = "SIGIO", [30] = "SIGPWR", [31] = "SIGSYS", [34] = "SIGRTMIN", [35] = "SIGRTMIN+1", [36] = "SIGRTMIN+2", [37] = "SIGRTMIN+3", [38] = "SIGRTMIN+4", [39] = "SIGRTMIN+5", [40] = "SIGRTMIN+6", [41] = "SIGRTMIN+7", [42] = "SIGRTMIN+8", [43] = "SIGRTMIN+9", [44] = "SIGRTMIN+10", [45] = "SIGRTMIN+11", [46] = "SIGRTMIN+12", [47] = "SIGRTMIN+13", [48] = "SIGRTMIN+14", [49] = "SIGRTMIN+15", [50] = "SIGRTMAX-14", [51] = "SIGRTMAX-13", [52] = "SIGRTMAX-12", [53] = "SIGRTMAX-11", [54] = "SIGRTMAX-10", [55] = "SIGRTMAX-9", [56] = "SIGRTMAX-8", [57] = "SIGRTMAX-7", [58] = "SIGRTMAX-6", [59] = "SIGRTMAX-5", [60] = "SIGRTMAX-4", [61] = "SIGRTMAX-3", [62] = "SIGRTMAX-2", [63] = "SIGRTMAX-1", [64] = "SIGRTMAX", }; static void usage(const char *app_name) { printf("%s [-f file] -h\n" " -h help (show this usage text)\n" " -f file\n", app_name); return; } void sig_handler(int signo) { char cmd[64] = {}; void *array[10]; int size = 0; char **strings = NULL; int i = 0; #if 0 printf("\n\n[%s: %d] bitbox crashed by signal %s.\n", __func__, __LINE__, signal_str[signo]); printf("Call Trace:\n"); size = backtrace(array, 10); strings = backtrace_symbols(array, size); if (strings) { for (i = 0; i < size; i++) printf (" %s\n", strings[i]); free (strings); } else { printf("Not Found\n\n"); } if (signo == SIGSEGV || signo == SIGBUS || signo == SIGTRAP || signo == SIGABRT) { sprintf(cmd, "cat /proc/%d/maps", getpid()); printf("Process maps:\n"); system(cmd); } #else wdt_stop_count(); #endif exit(-1); } int main(int argc, char **argv) { int c = -1; int daemonize = 0; printf("watchdog V1.7 start!!!!@_@\n"); signal(SIGPIPE, SIG_IGN); signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); signal(SIGBUS, sig_handler); signal(SIGSEGV, sig_handler); signal(SIGABRT, sig_handler); while (1) { c = getopt(argc, argv, "bBf:h"); if (c < 0) break; switch (c) { case 'b': case 'B': daemonize = 1; break; case 'f': break; case 'h': return 0; default: return -1; } } /* run in the background */ if (daemonize) { if (daemon(0, 1)) { perror("daemon"); return -1; } } while(1) { if(processExists("mozart")==true && processExists("bitbox")==true && processExists("mplayer")==true) { // printf("Both mozart and bitbox and mplayer are exists!!!!!\n"); wdt_start_count(20000); sleep(10); } else { printf(" mozart or bitbox or mplayer is not exist, Reboot!!!!! \n"); printf("Mozart process exist ???: %d\n",processExists("mozart")); printf("BitBox process exist ???: %d\n",processExists("bitbox")); printf("Mplayer process exist ???: %d\n",processExists("mplayer")); break; } } }
程序運行起來後,經過ps可查看到:
S 0 241 1 7868 716 0:0 13:16 00:00:01 watchdog -b