守護進程:看門狗watchdog的添加

本文爲項目開發總結的原創文檔。linux

 

本項目,添加一個watchdog守護進程,用來監控環境的三大進程mozart、bitbox、mplayer,任何一個進程出現故障,整個環境進行重啓。vim

 

首先有經過版級驅動/arch/mips/xburst/soc-x1000/common# vim reset.c 
找到與看門狗有關的code[同事發現,牛!];所以主要是將核心代碼從內核空間搬移到用戶空間,及如何監控應用層的進程。
 
總體實現思路:
1.建立一個進程做爲守護進程:watchdog
  
進程的添加:
在configs下添加watchdog.mak;
在src下添加watchdog包,用於加入watchdog相關的code;Makefile;
 
進程的啓動:在app.c中的startall中調用mozart_system("watchdog -b");
 
2.調整進程優先級:
1)如何查看進程的優先級
2)如何修改進程的優先級;
 
3.watchdog守護進程如何監控mozart和bitbox和mplayer
咱們知道內核會經過/proc虛擬文件系統導出系統中正在運行的進程信息,每一個進程都有一個/proc/<pid>目錄。所以咱們能夠將檢測進程是否存在轉換爲檢測/proc/<pid>目錄是否存在,這樣就簡單多了。

以下文詳細代碼中的processExists;app

 

實現難點突破:this

用戶空間和內核空間操做的都是虛擬地址。
1)若是是拿到的是物理地址,用戶空間能夠經過mmap的方式將物理地址轉成虛擬地址(每一次的地址值都不同),能夠直接對這個虛擬地址賦值。
以下:
static int dev_fd;
    dev_fd = open("/dev/mem", O_RDWR | O_NDELAY);
 
    if (dev_fd < 0) {
        printf("open(/dev/mem) failed.");
        return 0;
    }
 
    unsigned char *map_base=(unsigned char * ) mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd,  WDT_IOBASE (這個地址是物理地址!));
 
*( unsigned long*)(map_base + TCU_TSCR) = (1 << 16);
 
close(dev_fd);
 
2)而內核空間,從物理地址轉成虛擬地址,通常是固定的。0x1000200---0xb000200;
 
3)同一個物理地址轉成虛擬地址,用戶空間和內核空間是不相同的。
關於用戶空間和內核空間:
物理地址在內核空間和用戶空間映射地址不同~~~

 

看門狗實際上就是一個定時器,其硬件內部維護了一個計數的寄存器。每當時鐘信號到來時,計數寄存器減掉1,。若是減到0,則重啓系統。spa

若是減到0以前,系統又設置計數寄存器到一個較大的值,則系統永遠不會重啓。 .net

 

watchdog的基本實現原理是:rest

用戶空間程序打開 /dev/mem設備(俗稱「開門放狗」),code

就會致使在內核中啓動一個定時器(本項目mdt_start_count的入參是20000ms即20s),此後,用戶空間程序須要保證在20分鐘以內向這個設備寫入數據(俗稱「按期喂狗」),每次寫操做會致使從新設定定時器(本項目是每sleep 10s從新去設定)。若是用戶空間程序在20分鐘以內沒有寫操做,定時器到期會致使一次系統Reboot操做(「狗咬人了」)。 blog

 

watchdog.c 內容以下:進程

#include <string.h>
#include <stdlib.h>
#include <signal.h>
#include <stdbool.h>
#include <unistd.h>
#include <pthread.h>
#include <errno.h>
#include <time.h>
#include <sys/types.h>
#include <pwd.h>
#include <sys/stat.h>
#include <linux/input.h>
#include <fcntl.h>
#include <execinfo.h>
#include <sys/mman.h>

#define WDT_IOBASE (0x10002000)
#define MAP_SIZE        0xFF



#define JZ_EXTAL_RTC      32768     /* RTC extal freq: 32.768 KHz */
#define TCU_IOBASE      0x10002000
#define TCU_TSCR   (0x3C)   /* Timer Stop Clear Register */

#define WDT_TCSR                (0x0c)  /* rw, 32, 0x???????? */
#define WDT_TCER                (0x04)  /* rw, 32, 0x???????? */
#define WDT_TDR                 (0x00)  /* rw, 32, 0x???????? */
#define WDT_TCNT                (0x08)  /* rw, 32, 0x???????? */
#define TCU_TSSR   (0x2C)   /* Timer Stop Set Register */

static void wdt_start_count(int msecs)
{
    static int dev_fd;
    dev_fd = open("/dev/mem", O_RDWR | O_NDELAY);

    if (dev_fd < 0) {
        printf("open(/dev/mem) failed.");
        return 0;
    }

    unsigned char *map_base=(unsigned char * )mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd, WDT_IOBASE );

        int time = JZ_EXTAL_RTC / 64 * msecs / 1000;
        if(time > 65535)
                time = 65535;

#if 0
        outl(1 << 16,TCU_IOBASE + TCU_TSCR);

        outl(0,WDT_IOBASE + WDT_TCNT);          //counter
        outl(time,WDT_IOBASE + WDT_TDR);        //data
        outl((3<<3 | 1<<1),WDT_IOBASE + WDT_TCSR);
        outl(0,WDT_IOBASE + WDT_TCER);
        outl(1,WDT_IOBASE + WDT_TCER);
#endif

/*上文屏蔽部分是內核空間對寄存器的操做,修改爲用戶空間對寄存器的操做,關鍵是物理地址在用戶空間須要經過mmap進行轉換*/

// printf("wdt_start_count begin~~~. map_base = %p,time=%d\n",map_base,time); *( unsigned long*)(map_base + TCU_TSCR) = (1 << 16); *( unsigned long*)(map_base + WDT_TCNT) = 0;//counter *( unsigned long*)(map_base + WDT_TDR) = time;//data *( unsigned long*)(map_base + WDT_TCSR) = (3<<3 | 1<<1); *( unsigned long*)(map_base + WDT_TCER) = 0; *( unsigned long*)(map_base + WDT_TCER) = 1; close(dev_fd); // printf("wdt_start_count end.\n"); } static void wdt_stop_count(void) { static int dev_fd; dev_fd = open("/dev/mem", O_RDWR | O_NDELAY); if (dev_fd < 0) { printf("open(/dev/mem) failed."); return 0; } unsigned char *map_base=(unsigned char * )mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd, WDT_IOBASE ); printf("\033[1;33mwdt_stop_count begin~~~. map_base = %p\n\033[m",map_base); #if 0 outl(1 << 16,TCU_IOBASE + TCU_TSCR); outl(0,WDT_IOBASE + WDT_TCNT); //counter outl(65535,WDT_IOBASE + WDT_TDR); //data outl(1 << 16,TCU_IOBASE + TCU_TSSR); #endif *( unsigned long*)(map_base + TCU_TSCR) = (1 << 16); *( unsigned long*)(map_base + WDT_TCNT) = 0;//counter *( unsigned long*)(map_base + WDT_TDR) = 65535; *( unsigned long*)(map_base + TCU_TSSR) = (1 << 16); close(dev_fd); printf("wdt_stop_count end.\n"); } void jz_wdt_restart() { printf("Restarting after 4 ms\n"); while(1) { wdt_start_count(20000); sleep(10); } while(1) printf("check wdt.\n"); } /*判 斷 進 程 是 否 存 在*/ bool processExists(char * process_name) { FILE *ptr; int RE_BUF_SIZE = 32; char rebuff[RE_BUF_SIZE]; char ps[128]; snprintf(ps, sizeof(ps), "ps | grep %s |grep -v grep| wc -l", process_name); if((ptr = popen(ps, "r")) != NULL) { int count = 0; fgets(rebuff, RE_BUF_SIZE, ptr); if(rebuff != NULL) { count = atoi(rebuff); } pclose(ptr); return count >= 1; } printf("Current process %s is not Exist!!!!\n",process_name); return false; } static char *signal_str[] = { [1] = "SIGHUP", [2] = "SIGINT", [3] = "SIGQUIT", [4] = "SIGILL", [5] = "SIGTRAP", [6] = "SIGABRT", [7] = "SIGBUS", [8] = "SIGFPE", [9] = "SIGKILL", [10] = "SIGUSR1", [11] = "SIGSEGV", [12] = "SIGUSR2", [13] = "SIGPIPE", [14] = "SIGALRM", [15] = "SIGTERM", [16] = "SIGSTKFLT", [17] = "SIGCHLD", [18] = "SIGCONT", [19] = "SIGSTOP", [20] = "SIGTSTP", [21] = "SIGTTIN", [22] = "SIGTTOU", [23] = "SIGURG", [24] = "SIGXCPU", [25] = "SIGXFSZ", [26] = "SIGVTALRM", [27] = "SIGPROF", [28] = "SIGWINCH", [29] = "SIGIO", [30] = "SIGPWR", [31] = "SIGSYS", [34] = "SIGRTMIN", [35] = "SIGRTMIN+1", [36] = "SIGRTMIN+2", [37] = "SIGRTMIN+3", [38] = "SIGRTMIN+4", [39] = "SIGRTMIN+5", [40] = "SIGRTMIN+6", [41] = "SIGRTMIN+7", [42] = "SIGRTMIN+8", [43] = "SIGRTMIN+9", [44] = "SIGRTMIN+10", [45] = "SIGRTMIN+11", [46] = "SIGRTMIN+12", [47] = "SIGRTMIN+13", [48] = "SIGRTMIN+14", [49] = "SIGRTMIN+15", [50] = "SIGRTMAX-14", [51] = "SIGRTMAX-13", [52] = "SIGRTMAX-12", [53] = "SIGRTMAX-11", [54] = "SIGRTMAX-10", [55] = "SIGRTMAX-9", [56] = "SIGRTMAX-8", [57] = "SIGRTMAX-7", [58] = "SIGRTMAX-6", [59] = "SIGRTMAX-5", [60] = "SIGRTMAX-4", [61] = "SIGRTMAX-3", [62] = "SIGRTMAX-2", [63] = "SIGRTMAX-1", [64] = "SIGRTMAX", }; static void usage(const char *app_name) { printf("%s [-f file] -h\n" " -h help (show this usage text)\n" " -f file\n", app_name); return; } void sig_handler(int signo) { char cmd[64] = {}; void *array[10]; int size = 0; char **strings = NULL; int i = 0; #if 0 printf("\n\n[%s: %d] bitbox crashed by signal %s.\n", __func__, __LINE__, signal_str[signo]); printf("Call Trace:\n"); size = backtrace(array, 10); strings = backtrace_symbols(array, size); if (strings) { for (i = 0; i < size; i++) printf (" %s\n", strings[i]); free (strings); } else { printf("Not Found\n\n"); } if (signo == SIGSEGV || signo == SIGBUS || signo == SIGTRAP || signo == SIGABRT) { sprintf(cmd, "cat /proc/%d/maps", getpid()); printf("Process maps:\n"); system(cmd); } #else wdt_stop_count(); #endif exit(-1); } int main(int argc, char **argv) { int c = -1; int daemonize = 0; printf("watchdog V1.7 start!!!!@_@\n"); signal(SIGPIPE, SIG_IGN); signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); signal(SIGBUS, sig_handler); signal(SIGSEGV, sig_handler); signal(SIGABRT, sig_handler); while (1) { c = getopt(argc, argv, "bBf:h"); if (c < 0) break; switch (c) { case 'b': case 'B': daemonize = 1; break; case 'f': break; case 'h': return 0; default: return -1; } } /* run in the background */ if (daemonize) { if (daemon(0, 1)) { perror("daemon"); return -1; } } while(1) { if(processExists("mozart")==true && processExists("bitbox")==true && processExists("mplayer")==true) { // printf("Both mozart and bitbox and mplayer are exists!!!!!\n"); wdt_start_count(20000); sleep(10); } else { printf(" mozart or bitbox or mplayer is not exist, Reboot!!!!! \n"); printf("Mozart process exist ???: %d\n",processExists("mozart")); printf("BitBox process exist ???: %d\n",processExists("bitbox")); printf("Mplayer process exist ???: %d\n",processExists("mplayer")); break; } } }

 

 

程序運行起來後,經過ps可查看到:

S 0 241 1 7868 716 0:0 13:16 00:00:01 watchdog -b

相關文章
相關標籤/搜索