#---config nagios server---#ios
添加nagios initV 腳本vim
chkconfig --add nagiosapp
chkconfig --level 3 nagios onssh
測試nagios配置文件:工具
/fgn/theron/nagios/bin/nagios -v /fgn/theron/nagios/etc/nagios.cfg測試
配置nagios目錄屬組this
chown -R nagios:nagcmd /fgn/theron/nagios/url
啓動nagiosspa
service nagios startscala
#---cfg files---#
objects(對象)是全部可監控和通知的要素。
下邊包含的配置文件主要包括
hosts.cfg 定義被監控主機
hostgroups.cfg 定義被監控主機組
services.cfg 定義服務
servicegroups.cfg 定義服務組
contacts.cfg 定義聯繫人
contactgroups.cfg 定義聯繫人組
timeperiods.cfg 定義時間期限-如24x7全天候的監測
commands.cfg 定義命令
services.cfg 定義被監控進程
servicedependency 定義服務依賴
serviceescalation 定義服務擴展
hostdependency 定義主機依賴
hostescalation 定義主機擴展
cp -af /fgn/theron/nagios/etc/ /fgn/theron/nagios/etc.bak/
touch /fgn/theron/nagios/etc/objects/contactgroups.cfg
touch /fgn/theron/nagios/etc/objects/services.cfg
touch /fgn/theron/nagios/etc/objects/servicegroups.cfg
touch /fgn/theron/nagios/etc/objects/hosts.cfg
touch /fgn/theron/nagios/etc/objects/hostgroups.cfg
vim /fgn/theron/nagios/etc/nagios.cfg
修改cfg file爲
cfg_file=/fgn/theron/nagios/etc/objects/contacts.cfg
cfg_file=/fgn/theron/nagios/etc/objects/contactgroups.cfg
cfg_file=/fgn/theron/nagios/etc/objects/services.cfg
cfg_file=/fgn/theron/nagios/etc/objects/servicegroups.cfg
cfg_file=/fgn/theron/nagios/etc/objects/commands.cfg
cfg_file=/fgn/theron/nagios/etc/objects/timeperiods.cfg
cfg_file=/fgn/theron/nagios/etc/objects/templates.cfg
cfg_file=/fgn/theron/nagios/etc/objects/hosts.cfg
cfg_file=/fgn/theron/nagios/etc/objects/hostgroups.cfg
cfg_file=/fgn/theron/nagios/etc/objects/localhost.cfg
新增hosts
cat << EOF >> /fgn/theron/nagios/etc/objects/hosts.cfg
define host{
host_name 192.168.1.205
alias 192.168.1.205
address 192.168.1.205
max_check_attempts 5
#check_interval 1
#retry_interval 1
check_period 24x7
contact_groups sa_groups
notification_interval 30
#first_notification_delay #
notification_period 24x7
notification_options d,u,r
}
EOF
cat << EOF >> /fgn/theron/nagios/etc/objects/hosts.cfg
define host{
host_name 192.168.1.4
alias 192.168.1.4
address 192.168.1.4
max_check_attempts 5
#check_interval 1
#retry_interval 1
check_period 24x7
contact_groups sa_groups
notification_interval 30
#first_notification_delay #
notification_period 24x7
notification_options d,u,r
}
EOF
新增hostgroups
cat << EOF >> /fgn/theron/nagios/etc/objects/hostgroups.cfg
define hostgroup{
hostgroup_name all_hosts
alias all_hosts
members 192.168.1.4,192.168.1.205
#notes note_string
#notes_url url
#action_url url
}
define hostgroup{
hostgroup_name http_hosts
alias http_hosts
members 192.168.1.4
#notes note_string
#notes_url url
#action_url url
}
EOF
新增contacts
cat << EOF >> /fgn/theron/nagios/etc/objects/contacts.cfg
define contact{
contact_name cheng
alias sa_cheng
host_notifications_enabled 1 [0/1]
service_notifications_enabled 1 [0/1]
host_notification_period 24x7
service_notification_period 24x7
host_notification_options d,u,r
service_notification_options w,u,c,r
host_notification_commands notify-service-by-email,notify-service-by-sms
service_notification_commands notify-host-by-email,notify-host-by-sms
email admin@chengyongxu.com
pager 13712345678
can_submit_commands 1 [0/1]
#retain_status_information [0/1]
#retain_nonstatus_information [0/1]
}
EOF
新增contactgroups
cat << EOF >> /fgn/theron/nagios/etc/objects/contactgroups.cfg
define contactgroup{
contactgroup_name sa_groups
alias sa_groups
members cheng
#contactgroup_members contactgroups
}
EOF
#下邊檢查調用的命令(check_command),在命令配置文件中定義或在nrpe配置文件中要有定義
#最大重試次數(max_check_attempts),通常設置爲3-4次比較好,這樣不會由於太敏感而發生誤報,一丟包就發短信太崩潰了吧
#檢查間隔(check_interval)和重試檢查間隔(retry_interval)的單位是分鐘,不一樣的檢查項目酌情修改
#通知間隔(notification_interval)指探測到故障之後,每隔多少分鐘發送一次報警信息。
#狀態級別:
#d=send notifications on a DOWN state宕
#w=send notifications on a WARNING state警告狀態
#c=send notifications on a CRITICAL state嚴重狀態、臨界狀態
#u=send notifications on an UNREACHABLE or UNKNOWN state找不到、不可達
#r=send notifications on recoveries (OK state)OK狀態
#f=send notifications when the host or service starts and stops flapping
#s=send notifications when scheduled downtime starts and ends
新增services
cat << EOF >> /fgn/theron/nagios/etc/objects/services.cfg
#monitor hosts
define service{
host_name 192.168.1.4
service_description check_ftp
check_command check_ftp
max_check_attempts 3
check_interval 10
retry_interval 5
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
EOF
#monitor all_hosts
define service{
hostgroup_name all_hosts
service_description check_host-alive
check_command check_ping
max_check_attempts 5
check_interval 3
retry_interval 1
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
define service{
hostgroup_name all_hosts
service_description check_df
check_command check_nrpe!check_df
max_check_attempts 4
check_interval 1440
retry_interval 5
check_period 24x7
notification_interval 1440
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
define service{
hostgroup_name all_hosts
service_description check_load
check_command check_nrpe!check_load
max_check_attempts 5
check_interval 5
retry_interval 5
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
define service{
hostgroup_name all_hosts
service_description check_zombie_procs
check_command check_nrpe!check_zombie_procs
max_check_attempts 5
check_interval 5
retry_interval 5
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
define service{
hostgroup_name all_hosts
service_description check_total_procs
check_command check_nrpe!check_total_procs
max_check_attempts 5
check_interval 5
retry_interval 5
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
define service{
hostgroup_name all_hosts
service_description check_ssh
check_command check_ssh
max_check_attempts 3
check_interval 60
retry_interval 5
check_period 24x7
notification_interval 60
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
#monitor http_hosts
define service{
hostgroup_name http_hosts
service_description check_http
check_command check_http
max_check_attempts 4
check_interval 3
retry_interval 1
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
EOF
新增servicesgroups
cat << EOF >> /fgn/theron/nagios/etc/objects/servicegroups.cfg
define servicegroup{
servicegroup_name Ping
alias Ping
members test1,check_http,test2,check_http,test3,check_http
}
EOF
新增 commands
cat << EOF >> /fgn/theron/nagios/etc/objects/commands.cfg
# 'check_nrpe' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
EOF
定義完nrpe command,下面報錯消失
[root@www nagios]# ./bin/nagios -v ./etc/nagios.cfg
Nagios Core 3.2.1
Copyright (c) 2009-2010 Nagios Core Development Team and Community Contributors
Copyright (c) 1999-2009 Ethan Galstad
Last Modified: 03-09-2010
License: GPL
Website: http://www.nagios.org
Reading configuration data...
Read main config file okay...
Processing object config file '/fgn/theron/nagios/etc/objects/contacts.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/contactgroups.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/services.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/servicegroups.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/commands.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/timeperiods.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/templates.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/hosts.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/hostgroups.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/localhost.cfg'...
Read object config files okay...
Running pre-flight check on configuration data...
Checking services...
Error: Service check command 'check_nrpe' specified in service 'check_df' for host '192.168.1.205' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_load' for host '192.168.1.205' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_total_procs' for host '192.168.1.205' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_zombie_procs' for host '192.168.1.205' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_df' for host '192.168.1.4' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_load' for host '192.168.1.4' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_total_procs' for host '192.168.1.4' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_zombie_procs' for host '192.168.1.4' not defined anywhere!
Checked 22 services.
Checking hosts...
Checked 3 hosts.
Checking host groups...
Checked 3 host groups.
Checking service groups...
Checked 0 service groups.
Checking contacts...
Error: Service notification command 'notify-host-by-sms' specified for contact 'cheng' is not defined anywhere!
Error: Host notification command 'notify-service-by-sms' specified for contact 'cheng' is not defined anywhere!
Checked 2 contacts.
Checking contact groups...
Checked 2 contact groups.
Checking service escalations...
Checked 0 service escalations.
Checking service dependencies...
Checked 0 service dependencies.
Checking host escalations...
Checked 0 host escalations.
Checking host dependencies...
Checked 0 host dependencies.
Checking commands...
Checked 24 commands.
Checking time periods...
Checked 5 time periods.
Checking for circular paths between hosts...
Checking for circular host and service dependencies...
Checking global event handlers...
Checking obsessive compulsive processor commands...
Checking misc settings...
Total Warnings: 0
Total Errors: 10
***> One or more problems was encountered while running the pre-flight check...
Check your configuration file(s) to ensure that they contain valid
directives and data defintions. If you are upgrading from a previous
version of Nagios, you should be aware that some variables/definitions
may have been removed or modified in this version. Make sure to read
the HTML documentation regarding the config files, as well as the
'Whats New' section to find out what has changed.
#---第一次新增主機---#
vim hosts.cfg
define host{
use host-test-template
host_name test1
alias test1
address 192.168.1.205
process_perf_data 1
}
相應新增要使用:
主機模板host-test-template
vim templates.cfg
define host{
name host-test-template
contact_groups sa_groups
max_check_attempts 5
notification_interval 0
notification_period 24x7
notification_options d,u,r
check_command check-host-alive
}
新增聯繫組sa_groups
vim contactgroups.cfg
define contactgroup{
contactgroup_name sa_groups
alias sa_groups
members cheng
}
新增聯繫組成員:cheng
vim contacts.cfg
define contact{
contact_name cheng
alias sa_cheng
host_notifications_enabled 1 [0/1]
service_notifications_enabled 1 [0/1]
host_notification_period 24x7
service_notification_period 24x7
host_notification_options d,u,r
service_notification_options w,u,c,r
host_notification_commands notify-service-by-email
#,notify-service-by-sms
service_notification_commands notify-host-by-email
#,notify-host-by-sms
email admin@chengyongxu.com
pager 13712345678
can_submit_commands 1 [0/1]
#retain_status_information [0/1]
#retain_nonstatus_information [0/1]
}
新增命令:check-host-alive,notify-host-by-email,notify-service-by-email(默認就有,此步驟可省略)
vim commands.cfg
define command{
command_name check-host-alive
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
}
define command{
command_name notify-host-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
}
define command{
command_name notify-service-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | /bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
}
給全部nrpe客戶端使用:
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -t 20
}
#---之後新增主機---#
只需新增主機信息
vim hosts.cfg
define host{
use host-test-template
host_name test1
alias test1
address 192.168.1.205
process_perf_data 1
}
#---新增host group---#
define hostgroup{
hostgroup_name test_a_group
alias all_hosts
members test1,test2
#notes note_string
#notes_url url
#action_url url
}
相應新增service
vim services.cfg:
define service{
hostgroup_name test_a_group
service_description check-host-alive
check_command check_ping
max_check_attempts 5
check_interval 3
retry_interval 1
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
#---新增servicegroup---#
define servicegroup{
servicegroup_name Ping
alias Ping
members test1,check_http,test2,check_http,test3,check_http
}
#---新增腳本工具check_ips---#
相應新增service
define service{
hostgroup_name test_a_group,test_b_group,test_c_group
service_description check_ip_link
check_command check_nrpe!check_ips
max_check_attempts 5
check_interval 3s
retry_interval 1
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
contact_groups sa_groups
}
相應配置nrpe客戶端:
command[check_ips]=/fgn/theron/nagios/libexec/ip_conn.sh