#!/bin/bash # 功能: # 在系統可用內存很小時選取一個squid進程重啓 # 檢查各進程健康情況 MAIL="gongfan193@gmail.com" # 設置本身的郵箱 MAIL_FROM="oom_of_squid@gwbnsh.net.cn" CHECK_INTERVAL="3s" # 檢查時間間隔 THRESHOLD_LOW_MEM="90" # MB,低內存閥值; 必須大於vm.min_free_kbytes THRESHOLD_CHILD_MAX_MEM="200" # MB,child持有內存大於此值就重啓 THRESHOLD_EMERG="100" # MB,LEVEL="emerg"時重啓進程的閥值 LEVEL="normal" # 內存級別,可用內存較低時設置爲emerg; 正常狀況下應該是空變量 LOG="$0.log" # 探測配置文件,70-79之間的端口爲parent,80-87爲child CONFIG_LOCATION="/etc/squid/" PARENT_CONF=$(cd $CONFIG_LOCATION; echo squid7[0-9].conf) CHILD_CONF=$(cd $CONFIG_LOCATION; echo squid8[0-9].conf) ALL_CONF="$PARENT_CONF $CHILD_CONF" # disable killing by oom echo "-17" > /proc/self/oom_adj all_free_mem() { free -m | awk '/^Mem:/ {print ($4 + $6 + $7)}' # 單位是MB } squid_instance_mem() { INSTANCE=${CONF%.conf} PID_FILE="/var/run/$INSTANCE.pid" [[ -e $PID_FILE ]] && PID_FROM_FILE=`cat $PID_FILE` || PID_FROM_FILE="" # 檢查名稱和進程號是否對應 if [ -e $PID_FILE ]; then [[ x$PID_FROM_FILE != "x" ]] && grep -q squid /proc/$PID_FROM_FILE/comm || false else echo "$INSTANCE pid file not match to squid" >> $LOG fi # 佔用的內存數量 RSS=`awk '/^VmRSS:/ {printf "%d\n", $2/1024}' /proc/$PID_FROM_FILE/status` } keep_processes_health() { # 檢查全部進程,若是掛了,修正一下 for CONF in $ALL_CONF; do INSTANCE=${CONF%.conf} PID_FILE="/var/run/$INSTANCE.pid" STATE_FILE="/var/lib/init.d/started/$INSTANCE" [[ -e $PID_FILE ]] && PID_FROM_FILE=`cat $PID_FILE` || PID_FROM_FILE="" PID_RUNNING=`ps axo user,pid,cmd | awk '/^squid/ && /'$CONF'/ {print $2}'` if [ x$PID_RUNNING = "x" ]; then # 進程不存在: 啓動進程 process_state="not_running" echo "`date +%F\ %T` $INSTANCE state is $process_state, restarted" >> $LOG [[ -e $PID_FILE ]] && /bin/rm -f $PID_FILE [[ -e $STATE_FILE ]] && /bin/rm -f $STATE_FILE /etc/init.d/squid_multi_instance start ${INSTANCE#squid} >/dev/null 2>&1 notify_admin & elif [ x$PID_FROM_FILE = "x" ]; then # pid文件有問題: 修正 process_state="bad_pid_file" if echo $PID_RUNNING > $PID_FILE; then process_state="good" echo "fixed pid file of $INSTANCE at `date +%F\ %T`" >> $LOG else echo "can not write $PID_FILE" >> $LOG process_state="pid_file_not_writeable" notify_admin & fi elif [ x$PID_FROM_FILE != x$PID_RUNNING ]; then # 什麼狀況下會這樣呢... process_state="pid_not_equal" if echo $PID_RUNNING > $PID_FILE; then echo "fixed $process_state of $INSTANCE at `date +%F\ %T`" >> $LOG notify_admin & else echo "can not write $PID_FILE" >> $LOG process_state="$process_state pid_file_not_writeable" notify_admin & fi elif [ x$PID_FROM_FILE = x$PID_RUNNING ]; then # 運行良好 process_state="good" continue else process_state="unknow" notify_admin & fi done unset process_state } restart_process() { INSTANCE=${CONF%.conf} /etc/init.d/squid_multi_instance restart ${INSTANCE#squid} >/dev/null 2>&1 notify_admin & } pick_and_restart_parent() { # 重啓佔用內存最大的進程 for CONF in $PARENT_CONF; do squid_instance_mem PARENT_MEM="$PARENT_MEM\n$RSS $CONF" done PARENT_MEM_MAX=`echo -e $PARENT_MEM | sort -n | tail -1` if [ -n $PARENT_MEM_MAX ]; then CONF=`echo $PARENT_MEM_MAX | awk '{print $2}'` restart_process echo "restarted parent ${CONF%.conf} at `date +%F\ %T`" >> $LOG else echo "unknow error in pick_and_restart_parent" >> $LOG fi } pick_and_restart_child() { # 正常狀況下重啓內存大於300M的進程; 緊急狀況下重啓大於100M的進程 for CONF in $CHILD_CONF; do squid_instance_mem if [ $RSS -gt $THRESHOLD_CHILD_MAX_MEM ]; then process_state="over_THRESHOLD_CHILD_MAX_MEM" echo "$process_state restarted child ${CONF%.conf} at `date +%F\ %T`" >> $LOG restart_process else true fi done if [ x$LEVEL = "xemerg" ]; then for CONF in $CHILD_CONF; do squid_instance_mem [[ $RSS -gt $THRESHOLD_EMERG ]] && (process_state="over_THRESHOLD_EMERG"; restart_process) done else true fi } pick_and_restart_one() { # restart a child first pick_and_restart_child # memory still low, restart a parent [[ `all_free_mem` -lt $THRESHOLD_LOW_MEM ]] && pick_and_restart_parent # memory still low, restart all child [[ `all_free_mem` -lt $THRESHOLD_LOW_MEM ]] && \ (CONF="child"; restart_process) } notify_admin() { sendmail -t -f $MAIL_FROM <<EOF To: $MAIL From: $MAIL_FROM Subject: $INSTANCE on `hostname` restarted 重啓過了 `hostname` 上的 $INSTANCE 進程 $INSTANCE 的最後狀態爲: $process_state 當前系統總剩餘內存爲: `all_free_mem` . EOF } while true; do keep_processes_health # child佔用內存大於THRESHOLD_CHILD_MAX_MEM就重啓,不等總內存太低 pick_and_restart_child if [ `all_free_mem` -lt $THRESHOLD_LOW_MEM ]; then LEVEL="emerg" echo "low memory at `date +%F\ %T`" >> $LOG pick_and_restart_one else LEVEL="normal" sleep $CHECK_INTERVAL fi done # vim: set sw=4 ts=4: vim |