Nagios客戶端安裝

時間 2021-01-28

標籤 php html node python mysql linux ios web sql 數據庫欄目 PHP 简体版

原文原文鏈接

1.更新yum源
php

mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos
.d/CentOS-Base.repo.backup
wget -O /etc/yum.repos.d/CentOS-Base.repo  http://mirrors.aliyun.com/repo/Centos-6.repo

2.調整字符集html

echo 'export LC_ALL=C'>>/etc/profile
source /etc/profile

3.關閉iptables與selinuxnode

4.更新系統時間python

/usr/sbin/ntpdate pool.ntp.org

而後放入定時任務mysql

5.安裝gcc(能夠省略）linux

yum install gcc glibc glibc-common mysql* -y

6.添加用戶ios

/usr/sbin/useradd -m nagios -s /sbin/nologin

7.安裝插件web

yum -y install perl-devel
tar xf nagios-plugins-1.4.16.tar.gz
cd nagios-plugins-1.4.16
./configure --prefix=/usr/local/nagios --enable-perl-modules --enable-redhat-pthread-workaround 
make && make install
cd ../
ls /usr/local/nagios/libexec/|wc -l

8.安裝nrpesql

tar xf nrpe-2.12.tar.gz
cd nrpe-2.12
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
cd ../

9.安裝soft與iostat數據庫

tar xf Params-Validate-0.91.tar.gz 
cd Params-Validate-0.91 
perl Makefile.PL 
make  
make install 
cd ../

tar xf Class-Accessor-0.31.tar.gz
cd Class-Accessor-0.31
perl Makefile.PL
make
make install
cd ../

tar xf Config-Tiny-2.12.tar.gz
cd Config-Tiny-2.12
perl Makefile.PL
make
make install
cd ../

tar xf Math-Calc-Units-1.07.tar.gz
cd Math-Calc-Units-1.07
perl Makefile.PL
make
make install
cd ../

tar xf Regexp-Common-2010010201.tar.gz
cd Regexp-Common-2010010201
perl Makefile.PL
make
make install
cd ../

tar xf Nagios-Plugin-0.34.tar.gz
cd Nagios-Plugin-0.34
perl Makefile.PL
make
make install
cd ../

yum install sysstat -y
cp check_memory.pl /usr/local/nagios/libexec
cp check_iostat /usr/local/nagios/libexec
chmod 755 /usr/local/nagios/libexec/check_memory.pl
chmod 755 /usr/local/nagios/libexec/check_iostat
dos2unix /usr/local/nagios/libexec/check_memory.pl
dos2unix /usr/local/nagios/libexec/check_iostat

10.修改nrpe.cfg

[root@Nagios tools]# vim /usr/local/nagios/etc/nrpe.cfg +79

修改79行：allowed_hosts=127.0.0.1

爲：allowed_hosts=127.0.0.1,192.168.0.150 #容許哪一個服務端(192.168.0.150)對它發送命令

刪除199行到203行：

199 command[check_users]=/usr/local/nagios/libexec/check

_users -w 5 -c 10

200 command[check_load]=/usr/local/nagios/libexec/check_

load -w 15,10,5 -c 30,25,20

201 command[check_hda1]=/usr/local/nagios/libexec/check_

disk -w 20% -c 10% -p /dev/hda1

202 command[check_zombie_procs]=/usr/local/nagios/libexe

c/check_procs -w 5 -c 10 -s Z

203 command[check_total_procs]=/usr/local/nagios/libexec

/check_procs -w 150 -c 200

echo "command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,6 -c 30,25,20">>/usr/local/nagios/etc/nrpe.cfg

echo "command[check_mem]=/usr/local/nagios/libexec/check_memory.pl -w 6% -c 3%">>/usr/local/nagios/etc/nrpe.cfg

echo "command[check_disk]=/usr/local/nagios/libexec/check_disk -w 20% -c 8% -p /">>/usr/local/nagios/etc/nrpe.cfg

echo "command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%">>/usr/local/nagios/etc/nrpe.cfg

echo "command[check_iostat]=/usr/local/nagios/libexec/check_iostat -w 6 -c 10">>/usr/local/nagios/etc/nrpe.cfg

11.啓動nagios客戶端

pkill nrpe

/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

echo "#nagios nrpe process cmd by oldboy 2012-6-7" >> /etc/rc.local

echo "/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d">>/etc/rc.local

[root@Nagios tools]# netstat -lnt|grep 5666 && echo "nagios client is ok"

tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN

nagios client is ok

配置服務端主配置文件：

[root@Nagios etc]# vim nagios.cfg

在34行增長以下兩行：

cfg_file=/usr/local/nagios/etc/objects/services.cfg

cfg_file=/usr/local/nagios/etc/objects/hosts.cfg

註釋掉38行：

cfg_file=/usr/local/nagios/etc/objects/localhost.cfg

在38行最前面輸入"#"

#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg

按服務的目錄功能分(演示)：

54行：#cfg_dir=/usr/local/nagios/etc/services

修改成：cfg_dir=/usr/local/nagios/etc/services

[root@Nagios etc]# mkdir services

[root@Nagios etc]# chown -R nagios.nagios services/

[root@Nagios etc]# cd objects

[root@Nagios objects]# ls

commands.cfg localhost.cfg switch.cfg timeperiods.cfg

contacts.cfg printer.cfg templates.cfg windows.cfg

[root@Nagios objects]# head -51 localhost.cfg>hosts.cfg

[root@Nagios objects]# chown nagios.nagios hosts.cfg

[root@Nagios objects]# ls

commands.cfg localhost.cfg templates.cfg

contacts.cfg printer.cfg timeperiods.cfg

hosts.cfg switch.cfg windows.cfg

[root@Nagios objects]# touch services.cfg

[root@Nagios objects]# chown nagios.nagios services.cfg

Nagios監控模式的定義和約定:

1.按監控行爲分類：

主動監控

和nrpe無關，就是利用服務端本地插件直接獲取信息。

被動監控

主程序經過check_nrpe插件，和客戶端nrpe進程溝通，調用本地插件獲取數據。

2.如何選擇主動模式和被動模式

1）對於本地的資源性能等的監控，通常用被動模式，如負載，內存，硬盤，虛擬內存，磁盤IO，溫度，風扇等的監控（咱們也能夠經過snmp實現監控）

2）對於web服務，數據庫服務這種能對外提供服務的監控，通常用主動模式，如監控httpd,sshd,mysqld,rsyncd等的服務

3）主動模式和被動模式是相對的，而且是能夠互相轉換的，即主動模式的服務，能夠改爲被動模式，被動模式的服務有時也能夠改成主動模式。

實戰配置Nagios主機及服務實戰：

[root@Nagios etc]# cd objects

[root@Nagios objects]# vim hosts.cfg

define host{ #定義主機

use linux-server #使用的模板

host_name 151-web01 #被監控的主機名

alias 151-web01 #被監控的主機別名

address 192.168.0.151 #被監控的主機IP地址

}

define host{

use linux-server

host_name 150-nagios_server

alias 150-nagios_server

address 192.168.0.150

}

define hostgroup{ #定義主機組

hostgroup_name linux-servers

alias Linux Servers

members 151-web01,150-nagios_server #把上面定義的主機加到這一行，多個主機之間用逗號（，）分隔。

}

[root@Nagios objects]# /etc/init.d/nagios checkconfig #檢查配置語法

Running configuration check... CONFIG ERROR! Check your Nagios configuration.

[root@Nagios objects]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg #檢查配置語法

。。。。。。。。。。。。。。。。。。。。。。。。。

Read object config files okay...

Running pre-flight check on configuration data...

Checking services...

Error: There are no services defined! #錯誤緣由，沒有定義服務

Checked 0 services.

。。。。。。。。。。。。。。。。。。。。。。。。。

Total Warnings: 2

Total Errors: 1

。。。。。。。。。。。。。。。。。。。。。。。。。。

[root@Nagios objects]# vim /etc/init.d/nagios +183

183 $NagiosBin -v $NagiosCfgFile > /dev/null 2>&1;

修改成：

$NagiosBin -v $NagiosCfgFile

[root@Nagios objects]# /etc/init.d/nagios checkconfig #檢查配置語法

。。。。。。。。。。。。。。。。。。。。。。。。。

Read object config files okay...

Running pre-flight check on configuration data...

Checking services...

Error: There are no services defined!

Checked 0 services.

。。。。。。。。。。。。。。。。。。。。。。。。。

Total Warnings: 2

Total Errors: 1

。。。。。。。。。。。。。。。。。。。。。。。。。。

[root@Nagios objects]# vim services.cfg #編輯services.cfg服務配置文件，定義一個服務

define service {

use generic-service

host_name 151-web01

service_description Disk Partition

check_command check_nrpe!check_disk

}

[root@Nagios objects]# /etc/init.d/nagios checkconfig

。。。。。。。。。。。。。。。。。。。。。。。。。

Checking services...

Error: Service check command 'check_nrpe' specified in service 'Disk Partition' for host '151-web01' not defined anywhere! #錯誤緣由，沒有定義服務檢測（check_nrpe）命令

Checked 1 services.

Checking hosts...

Warning: Host '150-nagios_server' has no services associated with it!

Checked 2 hosts.

Checking host groups...

Checked 1 host groups.

Checking service groups...

。。。。。。。。。。。。。。。。。。。。。。。。。

Total Warnings: 1

Total Errors: 1

。。。。。。。。。。。。。。。。。。。。。。。。。。

CONFIG ERROR! Check your Nagios configuration.

[root@Nagios objects]# vim commands.cfg

在39行增長以下內容，定義服務檢測命令

39 # 'check_nrpe' command definition

40 define command {

41 command_name check_nrpe

42 command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$

43 }

[root@Nagios objects]# /etc/init.d/nagios checkconfig

。。。。。。。。。。。。。。。。。。。。。。。。。。

Read object config files okay...

Running pre-flight check on configuration data...

Checking services...

Checked 1 services.

Checking hosts...

Warning: Host '150-nagios_server' has no services associated with it!

Checked 2 hosts.

Checking host groups...

Checked 1 host groups.

。。。。。。。。。。。。。。。。。。。。。。。。。。。。

Total Warnings: 1

Total Errors: 0

Things look okay - No serious problems were detected during the pre-flight check

OK.

[root@Nagios objects]# /usr/local/nagios/libexec/check_nrpe -H 192.168.0.151 -c check_disk

DISK OK - free space: / 8970 MB (69% inode=92%);| /=3855MB;10814;12436;0;13518

[root@Nagios objects]# /etc/init.d/nagios reload #從新加載nagios配置

Running configuration check...done.

Reloading nagios configuration...done

在服務端的web界面查看主機和服務時出現以下錯誤：

It appears as though you do not have permission to view information for any of the hosts you requested...

If you believe this is an error, check the HTTP server authentication requirements for accessing this CGI

and check the authorization options in your CGI configuration file.

或

It appears as though you do not have permission to view information for any of the services you requested...

If you believe this is an error, check the HTTP server authentication requirements for accessing this CGI

and check the authorization options in your CGI configuration file.

解決方法：

[root@Nagios etc]# vim cgi.cfg

修改：

157 authorized_for_all_services=nagiosadmin

158 authorized_for_all_hosts=nagiosadmin

爲：

157 authorized_for_all_services=oldboy

158 authorized_for_all_hosts=oldboy

由於前面定義驗證用戶時，定義成oldboy用戶了

[root@Nagios etc]# /etc/init.d/nagios reload

Running configuration check...done.

Reloading nagios configuration...done

如下爲web正常結果：

Host Status Details For All Host Groups

Limit Results:

Host Sort by host name (ascending)Sort by host name (descending)Status Sort by host status (ascending)Sort by host status (descending)Last Check Sort by last check time (ascending)Sort by last check time (descending)Duration Sort by state duration (ascending)Sort by state duration time (descending)Status Information

150-nagios_server

View Service Details For This Host

UP 01-02-2016 21:46:40 0d 0h 1m 46s+ PING OK - Packet loss = 0%, RTA = 0.05 ms

151-web01

View Service Details For This Host

UP 01-02-2016 21:49:00 0d 0h 3m 12s PING OK - Packet loss = 0%, RTA = 0.27 ms

Service Status Details For All Hosts

Limit Results:

Host Sort by host name (ascending)Sort by host name (descending)Service Sort by service name (ascending)Sort by service name (descending)Status Sort by service status (ascending)Sort by service status (descending)Last Check Sort by last check time (ascending)Sort by last check time (descending)Duration Sort by state duration (ascending)Sort by state duration time (descending)Attempt Sort by current attempt (ascending)Sort by current attempt (descending)Status Information

151-web01

Disk Partition

OK 01-02-2016 21:46:10 0d 0h 4m 18s 1/3 DISK OK - free space: / 8970 MB (69% inode=92%):

Nagios排錯及監控Nagios經常使用本地資源:

[root@Nagios objects]# vim services.cfg #增長以下服務

define service { #http服務本身增長的

use generic-service

host_name 150-nagios_server

service_description http

check_command check_http!check_http

}

define service {

use generic-service

host_name 150-nagios_server

service_description load

check_command check_nrpe!check_load

}

define service {

use generic-service

host_name 150-nagios_server

service_description mem

check_command check_nrpe!check_mem

}

define service {

use generic-service

host_name 150-nagios_server

service_description swap

check_command check_nrpe!check_swap

}

define service {

use generic-service

host_name 150-nagios_server

service_description iostat

check_command check_nrpe!check_iostat

}

[root@Nagios objects]# /etc/init.d/nagios checkconfig

。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。

Total Warnings: 0

Total Errors: 0

Things look okay - No serious problems were detected during the pre-flight check

OK.

[root@Nagios objects]# /etc/init.d/nagios reload Running configuration check...done.

Reloading nagios configuration...done

查看web中服務的狀態（在瀏覽器中）：

service Status

150-nagios_server http PENDING

出現如上錯誤，等一會就行了

以下出現以下錯誤：

150-nagios_server http CRITICAL 01-03-2016 21:18:19 0d 0h 2m 51s 2/3 NRPE: Command 'check_http' not defined

解決方法以下：

[root@Nagios objects]# cd ..

[root@Nagios etc]# ls

cgi.cfg nagios.cfg nrpe.cfg.ori resource.cfg

htpasswd.users nrpe.cfg objects services

[root@Nagios etc]# vim nrpe.cfg

在最後一行後面加入以下內容：

command[check_http]=/usr/local/nagios/libexec/check_http -w $ARG1$ -c $ARG2$

[root@Nagios etc]# pkill nrpe

[root@Nagios etc]# ps -ef|grep nrpe

root 13431 12823 0 21:29 pts/0 00:00:00 grep nrpe

[root@Nagios etc]# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

[root@Nagios etc]# ps -ef|grep nrpe nagios 13433 1 0 21:29 ? 00:00:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

root 13435 12823 0 21:29 pts/0 00:00:00 grep nrpe

出現以下錯誤：

Status

CRITICAL

Status Information

HTTP CRITICAL: HTTP/1.1 403 Forbidden - 5159 bytes in 0.013 second response time

HTTP CRITICAL: HTTP/1.1 200 OK - 254 bytes in 0.001 second response time

解決方法以下：

http服務用後面的主動模式監控！！！！！！！！

Nagios(被動模式)實戰配置過程總結說明：

1.添加主機和主機組，若是有多臺機器，用逗號分隔

2.添加服務，能夠定義多個服務（被動模式，用check_nrpe!check_http)

3.檢查語法

4.重啓nagios服務（從新加載配置，能夠reload)

端口及url地址(主動模式）監控企業案例實戰：

[root@Nagios nagios]# cd libexec

[root@Nagios libexec]# ./check_tcp --help

。。。。。。。。。。。。。。。

Usage:

check_tcp -H host -p port

[root@Nagios libexec]# ./check_tcp -H 192.168.0.151 -p 80

TCP OK - 0.001 second response time on port 80|time=0.000811s;;;0.000000;10.000000

[root@Nagios libexec]# ./check_http -H 192.168.0.151 -p 80

HTTP OK: HTTP/1.1 200 OK - 254 bytes in 0.001 second response time |time=0.000744s;;;0.000000 size=254B;;;0

[root@Nagios libexec]# ./check_http --help

。。。。。。。。。。。。。。。。。。。。。。。。

Usage:

check_http -H <vhost> | -I <IP-address> [-u <uri>] [-p <port>]

[root@Nagios etc]# cd services

[root@Nagios services]# vim http.cfg

define service {
      use                  generic-service
      host_name            150-nagios_server
      service_description  http
      check_command        check_weburl!-I 192.168.0.150
      max_check_attempts    3
      normal_check_interval 2
      retry_check_interval  1
      check_period          24x7
      notification_interval 30
      notification_period   24x7
      notification_options  w,u,c,r
      contact_groups        admins
     }
define service {
      use                  generic-service
      host_name            151-web01
      service_description  http
      check_command        check_weburl!-I 192.168.0.151
      max_check_attempts    3
      normal_check_interval 2
      retry_check_interval  1
      check_period          24x7
      notification_interval 30
      notification_period   24x7
      notification_options  w,u,c,r
      contact_groups        admins
     }

[root@Nagios objects]# vim commands.cfg

在'check_http' command 下面緊接着加入如下內容：

# 'check_weburl' command definition

define command{

command_name check_weburl

command_line $USER1$/check_http $ARG1$ -w 10 -c 30

}

[root@Nagios objects]# /etc/init.d/nagios checkconfig

.................................................

Total Warnings: 0

Total Errors: 0

Things look okay - No serious problems were detected during the pre-flight check

OK.

[root@Nagios objects]# /etc/init.d/nagios reload

Running configuration check...done.

Reloading nagios configuration...done

nagios 服務分組定義：

1.# vim nagios.cfg

cfg_file=/usr/local/nagios/etc/objects/servicegroups.cfg #添加這一行

2.在/usr/local/nagios/etc/objects/下創建servicegroups.cfg文件

3.# vim servicegroups.cfg

# check web

define servicegroup{

servicegroup_name check_web

alias check_web

members 155,check_w1,156,check_w2,157,check_w3

}

#注意：members 這裏要寫成：主機1，服務，主機2，服務...的形式，我以前就是這裏出錯了。

4.# checknagios #檢測配置文件

5.# service nagios restart

url高級監控及MySQL-Rsync監控實戰:

[root@Nagios services]# vim http.cfg

在最後面增長以下監控端口的服務：

define service {
      use                   generic-service
      host_name             151-web01
      service_description   port_80
      check_command         check_tcp!80
      max_check_attempts    3
      normal_check_interval 2
      retry_check_interval  1
      check_period          24x7
      notification_interval 30
      notification_period   24x7
      notification_options  w,u,c,r
      contact_groups        admins
     }
define service {
      use                   generic-service
      host_name             151-web01
      service_description   port_3306
      check_command         check_tcp!3306
      max_check_attempts    3
      normal_check_interval 2
      retry_check_interval  1
      check_period          24x7
      notification_interval 30
      notification_period   24x7
      notification_options  w,u,c,r
      contact_groups        admins
     }

URL監控：

define service {
      use                   generic-service
      host_name             151-web01
      service_description   URL
      check_command         check_weburl!-H www.test.com #此處定義的check_weburl和Nagios服務端自帶的check_http等價，所以也能夠在這用check_http
      max_check_attempts    3
      normal_check_interval 2
      retry_check_interval  1
      check_period          24x7
      notification_interval 30
      notification_period   24x7
      notification_options  w,u,c,r
      contact_groups        admins
     }

[root@Nagios services]# vi /etc/hosts

添加解析www.test.com的主機，即作hosts映射（DNS解析）

192.168.0.151 www.test.com

保存退出

[root@Nagios services]# curl www.test.com

192.168.0.151:test1-web

[root@Nagios services]# /usr/local/nagios/libexec/check_http -H www.test.com

HTTP OK: HTTP/1.1 200 OK - 254 bytes in 0.001 second response time |time=0.000978s;;;0.000000 size=254B;;;0

[root@Nagios services]# /etc/init.d/nagios reload

從新加載服務端的配置後，監控www.test.com成功

高級URL監控：

[root@Nagios services]# vim http.cfg

define service {

use generic-service

host_name 151-web01

service_description URL_3

check_command check_http!-H www.test.com -u "/test/55.html"

max_check_attempts 3

normal_check_interval 2

retry_check_interval 1

check_period 24x7

notification_interval 30

notification_period 24x7

notification_options w,u,c,r

contact_groups admins

}

[root@Nagios services]# /usr/local/nagios/libexec/check_http -H www.test.com -u "/test/55.html"

HTTP OK: HTTP/1.1 200 OK - 236 bytes in 0.001 second response time |time=0.000924s;;;0.000000 size=236B;;;0

[root@Nagios services]# /etc/init.d/nagios reload

Running configuration check...done.

Reloading nagios configuration...done

從新加載服務端的配置後，監控www.test.com/test/55.html成功

集羣節點監控-服務分組實例-被動監控實戰詳解一例:

方案：

利用別名實現對集羣下面一樣節點的URL監控

如：

web1 blog.etiantian.org,blog1.etiantian.org

web2 blog.etiantian.org,blog2.etiantian.org

被動模式監控80端口：

[root@web01 html]# cd /usr/local/nagios

[root@web01 nagios]# ls

bin etc include libexec perl share

[root@web01 nagios]# cd libexec

[root@web01 libexec]# ./check_tcp -H 192.168.0.151 -p 80

TCP OK - 0.000 second response time on port 80|time=0.000187s;;;0.000000;10.000000

[root@web01 libexec]# pwd

/usr/local/nagios/libexec

[root@web01 libexec]# /usr/local/nagios/libexec/check_tcp -H 192.168.0.151 -p 80

TCP OK - 0.000 second response time on port 80|time=0.000216s;;;0.000000;10.000000

[root@web01 nagios]# cd etc

[root@web01 etc]# ls

nrpe.cfg

[root@web01 etc]# ls

nrpe.cfg

[root@web01 etc]# vim nrpe.cfg

在最後面追加：

command[check_port]=/usr/local/nagios/libexec/

check_tcp -H 192.168.0.151 -p 80 -w 5 -c 10

[root@web01 etc]# /usr/local/nagios/libexec/check_tcp -H 192.168.0.151 -p 80 -w 5 -c 10

TCP OK - 0.000 second response time on port 80|time=0.000214s;5.000000;10.000000;0.000000;10.000000

[root@web01 etc]# ps -ef|grep nrpe

nagios 21090 1 0 04:20 ? 00:00:01 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

root 21649 21201 0 07:05 pts/0 00:00:00 grep nrpe

[root@web01 etc]# pkill nrpe

[root@web01 etc]# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

[root@web01 etc]# ps -ef|grep nrpe nagios 21652 1 0 07:06 ? 00:00:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

root 21654 21201 0 07:06 pts/0 00:00:00 grep nrpe

[root@Nagios services]# cd /usr/local/nagios/libexec

[root@Nagios libexec]# ./check_nrpe -H 192.168.0.151 -c check_port

TCP OK - 0.000 second response time on port 80|time=0.000181s;5.000000;10.000000;0.000000;10.000000

[root@Nagios services]# vim http.cfg

define service {
      use                   generic-service
      host_name             151-web01
      service_description   port_80_beidong
      check_command         check_nrpe!check_port
      max_check_attempts    3
      normal_check_interval 2
      retry_check_interval  1
      check_period          24x7
      notification_interval 30
      notification_period   24x7
      notification_options  w,u,c,r
      contact_groups        admins
     }

[root@Nagios services]# /etc/init.d/nagios reload

Running configuration check...done.

Reloading nagios configuration...done

呵呵，被動模式監控80端口成功

做業：監控mysql主從同步（check_mysql),請用主動及被動分別監控主從同步

自定義插件開發規範及監控密碼文件變化案例實戰詳解：

給Nagios插件程序提供兩個返回值：一個是插件的退出狀態碼，另外一個是插件在控制檯打印的第一行數據。

Nagios主程序可識別的狀態碼和說明以下：

OK ---退出代碼 0-表示服務正常的工做

WARNING ---退出代碼 1-表示服務處於警告狀態

CRITICAL ---退出代碼 2-表示服務處於緊急，嚴重狀態

UNKNOWN ---退出代碼 3-表示服務處於未知狀態

[root@Nagios libexec]# head -7 utils.sh

#! /bin/sh

STATE_OK=0

STATE_WARNING=1

STATE_CRITICAL=2

STATE_UNKNOWN=3

STATE_DEPENDENT=4

Nagios被動模式自定義插件開發及應用：

[root@web01 ~]# md5sum /etc/passwd

97fac7fd2707bfd9a0edddf7083afc55 /etc/passwd

[root@web01 ~]# md5sum /etc/passwd >/etc/zkq.md5

[root@web01 ~]# md5sum -c /etc/zkq.md5

/etc/passwd: OK

[root@web01 ~]# vim check_passwd

#!/bin/sh

char=`md5sum -c /etc/zkq.md5 2>/dev/null|grep "OK"|wc -l`

if [ $char -eq 1 ];then

echo "passwd is ok"

exit 0

else

echo "passwd is changed"

exit 2

[root@web01 libexec]# chmod +x check_passwd

[root@web01 libexec]# ll /usr/local/nagios/libexec/check_passwd

-rwxr-xr-x 1 root root 167 Jan 3 09:52 /usr/local/nagios/libexec/check_passwd

[root@web01 libexec]# cd ..

[root@web01 nagios]# cd etc

[root@web01 etc]# ls

nrpe.cfg

[root@web01 etc]# vim nrpe.cfg

在最後一行加入以下內容：

command[check_passwd]=/usr/local/nagios/libexec/check_

passwd

[root@web01 etc]# pkill nrpe

[root@web01 etc]# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

[root@web01 etc]# ps -ef|grep nrpe

nagios 22735 1 0 10:00 ? 00:00:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

root 22737 22269 0 10:00 pts/0 00:00:00 grep nrpe

[root@Nagios]# cd libexec

[root@Nagios libexec]# ./check_nrpe -H 192.168.0.151 -c check_passwd

passwd is ok

[root@Nagios libexec]# cd ..

[root@Nagios nagios]# cd etc

[root@Nagios etc]# ls

cgi.cfg nrpe.cfg resource.cfg

htpasswd.users nrpe.cfg.ori services

nagios.cfg objects

[root@Nagios etc]# cd objects

[root@Nagios objects]# ls

commands.cfg printer.cfg timeperiods.cfg

contacts.cfg services.cfg windows.cfg

hosts.cfg switch.cfg

localhost.cfg templates.cfg

[root@Nagios objects]# vim services.cfg

在最後加入如下內容：

define service {

use generic-service

host_name 151-web01

service_description check_passwd

check_command check_nrpe!check_passwd

}

[root@Nagios objects]# /etc/init.d/nagios checkconfig

。。。。。。。。。。。。。。。。。。。。。。。。

Total Warnings: 0

Total Errors: 0

Things look okay - No serious problems were detected during the pre-flight check

OK.

嘿嘿，被動模式自定義插件應用成功

Nagios圖形顯示介紹及實戰安裝:

#一、圖形顯示管理的依賴庫

yum install cairo pango zlib zlib-devel freetype freetype-devel gd gd-devel -y

#二、rrdtools的依賴

[root@Nagios tools]# tar xf libart_lgpl-2.3.17.tar.gz

cd libart_lgpl-2.3.17

./configure

make

make install

/bin/cp -r /usr/local/include/libart-2.0 /usr/include

cd ../

#三、rrdtools輪詢的數據庫，專門畫圖

[root@Nagios tools]# tar xf rrdtool-1.2.14.tar.gz

cd rrdtool-1.2.14

./configure --prefix=/usr/local/rrdtool --disable-python --disable-tcl

#WARNING: The RRDs Perl Modules are not found on your System

#Using RRDs will speedup things in larger Installtions.

#configure後出現上面的提示能夠不用理會。

make

make install

cd ../

ls -l /usr/local/rrdtool/bin

#四、PNP收集數據告訴rrdtools畫圖，PHP負責展現

tar zxf pnp-0.4.14.tar.gz

cd pnp-0.4.14

./configure \

--with-rrdtool=/usr/local/rrdtool/bin/rrdtool --with-perfdata-dir=/usr/local/nagios/share/perfdata/

#################

# WARNING: The RRDs Perl Modules are not found on your System

# Using RRDs will speedup things in larger Installtions.

#####################

make all

make install

make install-config

make install-init

ll /usr/local/nagios/libexec/ |grep process

問題：configure報錯（通常不會發生）

checking for linker flags for loadable modules... -shared

checking for Perl Module Time::HiRes... no

configure: error: Perl Module Time::HiRes not available

解決：

yum install perl-Time-HiRes -y

配置出圖

[root@Nagios ~]# cd /usr/local/nagios/etc

[root@Nagios etc]# vim nagios.cfg

833 process_performance_data=1<==========

834

835

836

837 # HOST AND SERVICE PERFORMANCE DATA PROCESSING COMMANDS

838 # These commands are run after every host and service check is

839 # performed. These commands are executed only if the

840 # enable_performance_data option (above) is set to 1. The command

841 # argument is the short name of a command definition that you

842 # define in your host configuration file. Read the HTML docs for

843 # more information on performance data.

844

845 host_perfdata_command=process-host-perfdata<==========

846 service_perfdata_command=process-service-perfdata<==========

2.執行編輯命令 vi /usr/local/nagios/etc/objects/commands.cfg +227，修改commands.cfg配置文件。

#修改commands.cfg 配置文件，約227-238行

#默認配置爲（若是沒有直接添加便可）

#-----------------------------------------------------------------

# 'process-host-perfdata' command definition

define command{

command_name process-host-perfdata

command_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /usr/local/nagios/var/host-perfdata.out

}

# 'process-service-perfdata' command definition

define command{

command_name process-service-perfdata

command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perfdata.out

}

修改爲以下配置

# 'process-host-perfdata' command definition

define command{

command_name process-host-perfdata

command_line /usr/local/nagios/libexec/process_perfdata.pl

}

# 'process-service-perfdata' command definition

define command{

command_name process-service-perfdata

command_line /usr/local/nagios/libexec/process_perfdata.pl

}

也能夠用nagios變量$USER1$替代/usr/local/nagios/libexec/路徑

執行檢查語法命令/etc/init.d/nagios checkconfig

Total Warnings: 0

Total Errors: 0

根據提示，配置經過。重啓。

三、主機出圖的配置，模板或主機配置裏hosts.cfg

action_url /nagios/pnp/index.php?host=$HOSTNAME$

四、服務出圖，模板或服務配置裏hosts.cfg

action_url /nagios/pnp/index.php?host=$HOSTNAME$&srv=$SERVICEDESC$

snmp協議（cacti）,客戶端安裝或開啓SNMP

nrpe(nlient++)

網絡設備（路由器，交換機），開啓SNMP

短信報警的腳本開發及實戰配置：

一、添加聯繫人及聯繫組contacts.cfg

define contact{
        contact_name                    oldboy-pager
        use                             generic-contact
        alias                            Nagios users
        pager                            18901398229
}

二、添加報警的命令commands.cfg

#command.cfg

# 'notify-host-by-pager' command definition

define command{
        command_name    notify-host-by-pager
        command_line    $USER1$/sms_send "Host $HOSTSTATE$ alert for $HOSTNAME$"  $CONTACTPAGER$
        }
# 'notify-service-by-pager' command definition
define command{
        command_name    notify-service-by-pager
        command_line    $USER1$/sms_send "$HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$"  $CONTACTPAGER$
        }

三、調整聯繫人模板，添加報警的命令（來自於commands.cfg）

編輯templates.cfg裏的定義內容爲：

define contact{
        name                            generic-contact 
        service_notification_period     24x7
        host_notification_period        24x7                    
        service_notification_options    w,u,c,r,f,s            
        host_notification_options       d,u,r,f,s                       
        service_notification_commands   notify-service-by-email,notify-service-by-fetion,notify-service-by-msn, notify-service-by-pager
        host_notification_commands      notify-host-by-email,notify-service-by-fetion,notify-host-by-msn, notify-host-by-pager 
        register                        0                               
}

四、hosts.cfg,services.cfg添加報警聯繫人及組，或者對應模板加

contact_groups admins,groups1,groups2,user01

開發短信報警腳本（短信網關是收費的）

[root@oldboy-A libexec]# pwd

/usr/local/nagios/libexec

[root@oldboy-A libexec]# cat sms_send

#!/bin/sh

alert_date=$(date +%y-%m-%d" "%H:%M)

TITLE=$1 #FORMAT "Host $HOSTSTATE$ alert for $HOSTNAME$"

CONTACT=$2 #$2爲手機號

#curl方式

curl -d cdkey=3RTY-EMY-0980-MTUQ2 -d password=189162 -d phone=$CONTACT -d message="$TITLE[${alert_date} oldboysa]" http://sdkhttp.eucp.b2m.cn/sdkproxy/sendsms.action

#wget --quiet "http://s.ccme.cc/qxt/send.jsp?circle=159net_131&pwd=oldboy123&mobile=18911718229&service=f1fb0546-ebb6-0987-8f20-560524c1f88d&msgid=3956724&message=$TITLE[${alert_date} oldboysa n]"

一、添加聯繫人及聯繫組contacts.cfg

define contact{
        contact_name                    oldboy-pager
        use                             generic-contact
        alias                            Nagios users
        pager                            18901398229
}

二、添加報警的命令commands.cfg

#command.cfg

# 'notify-host-by-pager' command definition

define command{
        command_name    notify-host-by-pager
        command_line    $USER1$/sms_send "Host $HOSTSTATE$ alert for $HOSTNAME$"  $CONTACTPAGER$
        }
# 'notify-service-by-pager' command definition
define command{
        command_name    notify-service-by-pager
        command_line    $USER1$/sms_send "$HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$"  $CONTACTPAGER$
        }

三、調整聯繫人模板，添加報警的命令（來自於commands.cfg）

編輯templates.cfg裏的定義內容爲：

define contact{
        name                            generic-contact 
        service_notification_period     24x7
        host_notification_period        24x7                    
        service_notification_options    w,u,c,r,f,s            
        host_notification_options       d,u,r,f,s                       
        service_notification_commands   notify-service-by-email,notify-service-by-fetion,notify-service-by-msn, notify-service-by-pager
        host_notification_commands      notify-host-by-email,notify-service-by-fetion,notify-host-by-msn, notify-service-by-pager 
        register                        0                               
}

四、hosts.cfg,services.cfg添加報警聯繫人及組，或者對應模板加

contact_groups admins,groups1,groups2,user01

開發腳本批量生成Nagios配置文件實戰：

########################

批量創建nagios配置文件：

########################

好多兄弟們都說nagios配置麻煩，都來問過我如何管理，爲了提升解答效率，因此這裏統一總結下，供你們參考。呵呵。

這是之前的一個例子，拋磚引玉下，你能夠拓展作成菜單式等更詳細的配置,若是你足夠努力，能夠作到一鍵部署幾百

臺服務器的服務監控，是沒有問題的（包括host.cfg,service.cfg等），若是配置支持目錄就更簡單了。。歡迎隨時交流。

1.創建機器名和IP對應列表

cat >host.list <<EOFoldboy
oldboyVM-001 192.168.1.1
oldboyVM-002 192.168.1.2
oldboyVM-003 192.168.1.3
oldboyVM-004 192.168.1.4
EOFoldboy

#提示：上面列表能夠手工編輯或者腳本批量去取

2.批量生成 nagios host配置文件

rm -f new.host
exec <host.list
while read line
do
 echo 'define host {' >>new.host
 echo '        use                     linux-server' >>new.host
 echo "        host_name               `echo $line|awk '{print $1}'`" >>new.host
 echo "        alias                   `echo $line|awk '{print $1}'` " >>new.host
 echo "        address                 `echo $line|awk '{print $2}'` " >>new.host
 echo "}" >>new.host
done

3.最終host內容

$ cat new.host
define host {
        use                     linux-server
        host_name               oldboyVM-001
        alias                   oldboyVM-001
        address                 192.168.1.1
}
define host {
        use                     linux-server
        host_name               oldboyVM-002
        alias                   oldboyVM-002
        address                 192.168.1.2
}
define host {
        use                     linux-server
        host_name               oldboyVM-003
        alias                   oldboyVM-003
        address                 192.168.1.3
}
define host {
        use                     linux-server
        host_name               oldboyVM-004
        alias                   oldboyVM-004
        address                 192.168.1.4
}