Monitoring Resources with Nagios

Overview

Overview can be obtained from Nagios built-in documentation or on their web site Nagios 3.x Documentation

Most plugins can be downloaded from Nagios Exchange

For more information on nagios distributed monitoring see Nagios 3.0 Distributed Monitoring

Setup

Server

Central Server Setup

The central server will collect passive check results from the distributed servers. It may also perform active checks on its own. It is installed as a regular stand-alone server.

1. Let’ use an example, where we install nagios in /usr/local/nagios on a computer called server. It will later be accessible with http://server/nagios/ (Username nagiosadmin)

# groupadd nagios
# useradd -g nagios -d /export/home/nagios -m nagios
# groupadd nagcmd
# usermod -G nagcmd nagios
# usermod -G nagcmd nobody

$ cd ~
$ gunzip nagios-3.0.3.tar.gz
$ tar -xf nagios-3.0.3.tar
$ cd /export/home/arkadi/nagios-3.0.3
$ ./configure --prefix=/usr/local/nagios-3.0.3 --with-command-group=nagcmd \
              --with-gd-lib=/opt/csw/lib --with-gd-inc=/opt/csw/include
              --with-httpd-conf=/opt/csw/apache2/etc/extra
$ make all

# make install
# make install-init
# make install-config
# make install-commandmode
# make install-webconf

# ln -s /usr/local/nagios-3.0.3 /usr/local/nagios

# vi /opt/csw/apache2/etc/httpd.conf
 # Nagios virtual host
 Include etc/extra/nagios.conf

# htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
# apachectl restart

2. Install Nagios plugins

$ cd ~
$ gunzip nagios-plugins-1.4.12.tar.gz
$ tar -xf nagios-plugins-1.4.12.tar
$ cd nagios-plugins-1.4.12
$ ./configure --with-nagios-user=nagios --with-nagios-group=nagios
$ make

# make install

3. Configure nagios as a central server

# sudo -E -H -i -u nagios
$ cd /usr/local/nagios
$ vi etc/nagios.cfg
 cfg_dir=/usr/local/nagios/etc/servers
 enable_notifications=1
 execute_service_checks=1
 check_external_commands=1
 accept_passive_service_checks=1
 accept_passive_host_checks=1
 translate_passive_host_checks=1
 obsess_over_services=0
 #ocsp_command=
 check_service_freshness=1
 service_freshness_check_interval=60
 check_host_freshness=1
 host_freshness_check_interval=60
 
$ mkdir /usr/local/nagios/etc/servers

4. Define contacts and contact groups

# vi etc/objects/contacts.cfg
define contact{
        contact_name            nagiosadmin
        use                     generic-contact
        alias                   Nagios Admin
        email                   sysadm@betline.ru
        }

define contactgroup{
        contactgroup_name       admins
        alias                   Nagios Administrators
        members                 nagiosadmin
        }

5. Define a template for remote host and service passive checks

$ vi etc/objects/templates.cfg

define host{
        name                            remote-host
        use                             generic-host
        obsess_over_host                0
        check_freshness                 1
        freshness_threshold             3600
        max_check_attempts              1
        check_command                   host-is-stale
        notifications_enabled           1
        event_handler_enabled           1
        flap_detection_enabled          1
        failure_prediction_enabled      1
        process_perf_data               0
        retain_status_information       1
        retain_nonstatus_information    0
        check_period                    24x7
        check_interval                  15
        retry_interval                  5
        contact_groups                  admins
        notification_options            d,u,r
        notification_interval           60
        notification_period             24x7
        register                        0
        }


define service{
        name                            remote-service
        active_checks_enabled           0
        passive_checks_enabled          1
        obsess_over_service             0
        check_freshness                 1
        freshness_threshold             3600
        max_check_attempts              1
        check_command                   service-is-stale
        notifications_enabled           1
        event_handler_enabled           1
        flap_detection_enabled          1
        failure_prediction_enabled      1
        process_perf_data               0
        retain_status_information       1
        retain_nonstatus_information    0
        is_volatile                     0
        check_period                    24x7
        check_interval                  15
        retry_interval                  5
        contact_groups                  admins
        notification_options            w,u,c,r
        notification_interval           60
        notification_period             24x7
        register                        0
        }

6. Define service groups

# vi etc/servers/service-groups.cfg
define servicegroup{
        servicegroup_name       Processes
        alias                   Processes that must run on a remote host
        servicegroup_members    NFS,NTP,POOL
        }

define servicegroup{
        servicegroup_name       LOAD
        alias                   System load
        }

define servicegroup{
        servicegroup_name       CPU
        alias                   Percentage of CPU usage
        }

define servicegroup{
        servicegroup_name       SSH
        alias                   SSH daemons

define servicegroup{
        servicegroup_name       NFS
        alias                   NFS daemons
        }

define servicegroup{
        servicegroup_name       NTP
        alias                   NTP daemons
        }

define servicegroup{
        servicegroup_name       POOL
        alias                   Solaris Pool daemons
        }

7. Define host groups

$ vi etc/servers/host-groups.cfg
 define hostgroup{
        hostgroup_name  solaris
        alias           Solaris Servers
        members         localhost, gamesys08
        }

8. Define hosts

$ vi gamesys08.cfg
define host{
        use                     generic-server            
        host_name               gamesys08
        alias                   Curacao IPFilter Firewall
        address                 200.6.63.16
#       parents
        active_checks_enabled   1
        passive_checks_enabled  1
        notes                   SUN T1000, Solaris 10u5
        notes_url               http://trac.hibet.ru:8000/project/wiki/gamesys08
        }
define service{
        use                             generic-service
        host_name                       gamesys08
        service_description             PING
        check_command                   check_ping!500.0,20%!3000.0,60%
        }
define service{
        use                             generic-service
        host_name                       gamesys08
        service_description             SSH
        check_command                   check_ssh
        notifications_enabled           1
        }

define service{
        use                             remote-service
        host_name                       gamesys08
        service_description             DISK
        }

define service{
        use                             generic-service
        host_name                       gamesys08
        service_description             CPU
        }

etc, carry on with things like SNMP_PROC, SENDMAIL_PROC, IPMON_PROC, NTP_PROC, SYSLOG_PROC, USERS, etc

9. Define check commands

$ vi objects/commands.cfg

define command{
        command_name    check_http_vhost
        command_line    $USER1$/check_http -H $ARG1$ -u $ARG2$
        }

define command{
        command_name    check_http_vhost_auth
        command_line    $USER1$/check_http -H $ARG1$ -u $ARG2$ -a $ARG3$
        }

define command{
        command_name check_dummy
        command_line $USER1$/check_dummy $ARG1$
}

define command{
        command_name service-is-stale
        command_line $USER1$/check_dummy 2 "Service passive check results are
stale"
}

define command{
        command_name host-is-stale
        command_line $USER1$/check_dummy 2 "Host passive check results are
stale"
}


10. Define resources

$ vi resource.cfg
 # User nagiosadmin and its password
 $USER5$=nagiosadmin:******

11. Check the configuration

$ /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

or with

# /etc/init.d/nagios checkconfig

12. Start nagios

# /etc/init.d/nagios start

13. Install Nagios plugins

$ cd ~
$ gunzip nagios-plugins-1.4.12.tar.gz
$ tar -xf nagios-plugins-1.4.12.tar
$ cd nagios-plugins-1.4.12
$ ./configure --with-nagios-user=nagios --with-nagios-group=nagios
$ make

# make install

14. Install nagios addon NSCA for processing the results of remote check commands on the nagios (central) server and run it via inetd SMF

$ cd ~
$ gunzip nsca-2.7.2.tar.gz
$ tar -xf nsca-2.7.2.tar
$ cd nsca-2.7.2
$ ./configure
$ make all

# cp src/nsca /usr/local/nagios/bin
# cp sample_config/nsca.cfg /usr/local/nagios/etc
# vi /usr/local/nagios/etc/nsca.cfg
 password=**********
 decryption_method=3
# vi /etc/inet/inetd.conf
 nsca    stream  tcp6    nowait  nagios  /usr/local/nagios/bin/nsca nsca -c /usr/local/nagios/etc/nsca.cfg --inetd
# inetconv -o /var/tmp
# svccfg import /var/tmp/nsca-tcp6.xml
# inetadm|grep nsca
 enabled   online         svc:/network/nsca/tcp6:default

15. For NSCA debugging it makes sense to start the nsca process as a stand-alone daemon with debug option set to 1 in nsca.cfg. The log info can be found in /var/adm/messages or whatever is set in /etc/syslog.conf

# vi /usr/local/nagios/etc/nsca.cfg
 debug=1
# inetadm -d svc:/network/nsca/tcp6:default
# /usr/local/nagios/bin/nsca nsca -c /usr/local/nagios/etc/nsca.cfg --daemon

Distributed Nagios Servers

The distributed Nagios server will do active checks on the local clients and submit the results to the central server. It is installed as a bare minimum nagios server without the web interface and notifications turned off. It also requires the client part of NSCA add-on to be installed (two files: send_nsca and send_nsca.cfg). See the Client section for NSCA install.

# groupadd nagios
# useradd -g nagios -d /export/home/nagios -m nagios

$ cd ~
$ gunzip nagios-3.0.3.tar.gz
$ tar -xf nagios-3.0.3.tar
$ cd /export/home/arkadi/nagios-3.0.3
$ ./configure --prefix=/usr/local/nagios-3.0.3 --with-command-group=nagcmd
$ make all

# make install
# make install-init
# make install-config

# ln -s /usr/local/nagios-3.0.3 /usr/local/nagios
# cd /usr/local/nagios
# vi nagios.cfg
 cfg_dir=/usr/local/nagios/etc/servers
 enable_notifications=0
 obsess_over_services=1
 ocsp_command=submit_service-check_result
 obsess_over_host=1
 ochp_command=submit_host-check_result

# vi etc/objects/command.cfg
 define command{
        command_name    submit_service-check_result
        command_line    /usr/local/nagios/libexec/eventhandlers/submit_service-check_result $HOSTNAME$ 'SERVICEDESC$' $SERVICESTATEID$ '$SERVICEOUTPUT$'
        }
 define command{
        command_name    submit_host-check_result
        command_line    /usr/local/nagios/libexec/eventhandlers/submit_host-check_result $HOSTNAME$ $HOSTSTATEID$ '$HOSTOUTPUT$'
        }

# vi libexec/eventhandlers/submit_service-check_result

#!/bin/sh

CENTRAL_SERVER=sparc5
NSCA_CONFIG=/usr/local/nagios/etc/send_nsca.cfg
NSCA_COMMAND=/usr/local/nagios/bin/send_nsca

        # Arguments:
        #  $1 = host_name (Short name of host that the service is
        #       associated with)
        #  $2 = svc_description (Description of the service)
        #  $3 = state_id_string (A string representing the status of
        #       the given service - "OK", "WARNING", "CRITICAL"
        #       or "UNKNOWN")
        #  $4 = plugin_output (A text string that should be used
        #       as the plugin output for the service checks)
        #

        # pipe the service check info into the send_nsca program, which
        # in turn transmits the data to the nsca daemon on the central
        # monitoring server

        /bin/printf "%s\t%s\t%s\t%s\n" "$1" "$2" "$3" "$4" | $NSCA_COMMAND -H $CENTRAL_SERVER -c $NSCA_CONFIG

# vi libexec/eventhandlers/submit_host-check_result

#!/bin/sh

CENTRAL_SERVER=sparc5
NSCA_CONFIG=/usr/local/nagios/etc/send_nsca.cfg
NSCA_COMMAND=/usr/local/nagios/bin/send_nsca

        # Arguments:
        #  $1 = host_name (Short name of host that the service is
        #       associated with)
        #  $2 = state_id_string (A string representing the status of
        #       the given service - "OK", "WARNING", "CRITICAL"
        #       or "UNKNOWN")
        #  $3 = plugin_output (A text string that should be used
        #       as the plugin output for the service checks)
        #

        # pipe the service check info into the send_nsca program, which
        # in turn transmits the data to the nsca daemon on the central
        # monitoring server

        /bin/printf "%s\t%s\t%s\t%s\n" "$1" "$2" "$3" | $NSCA_COMMAND -H $CENTRAL_SERVER -c $NSCA_CONFIG

# chmod 755 libexec/eventhandlers/submit_service-check_result
# chmod 755 libexec/eventhandlers/submit_host-check_result

# crle -u -l /opt/csw/lib

# /etc/init.d/nagios start

Configure hosts and services as above, skipping the service, host and contact groups and contacts. The central server must have corresponding services defined in its config files to accept the results of passive checks.

Clients

1. Let’s now install nagios plug-ins and NSCA client on a remote machine

$ cd ~
$ wget <url>nagios-plugins-1.4.13.tar.gz
$ gunzip nagios-plugins-1.4.13.tar.gz
$ tar -xf nagios-plugins-1.4.13.tar
$ cd nagios-plugins-1.4.13
$ ./configure --prefix=/usr/local/nagios-1.4.13  --with-nagios-user=nagios --with-nagios-group=nagios  --without-mysql
$ make
$ sudo -s -E

# groupadd nagios
# useradd -g nagios -d /export/home/nagios -m nagios
# make install
# ln -s /usr/local/nagios-1.4.13 /usr/local/nagios
# vi /etc/profile
 PATH=$PATH:/usr/local/nagios/libexec:/usr/local/nagios/bin
# mkdir /usr/local/nagios/bin
# mkdir /usr/local/nagios/etc

$ gunzip nsca-2.7.2.tar.gz
$ tar -xf nsca-2.7.2.tar
$ cd nsca-2.7.2
$ ./configure
$ make all

# cp src/send_nsca /usr/local/nagios/bin
# cp sample_config/send_nsca.cfg /usr/local/nagios/etc
# vi /usr/local/nagios/etc/send_nsca.cfg
 password=**********
 decryption_method=3

# chown nagios /usr/local/nagios/etc/send_nsca.cfg
# chmod 400 /usr/local/nagios/etc/send_nsca.cfg

2. Let’s create some basic checks

# cd /usr/local/nagios/bin
# vi disk_check
 #!/usr/bin/bash

HOSTNAME=`hostname`
SERVERNAME=sparc5
SERVICE_NAME=DISK
CHECK_COMMAND=/usr/local/nagios/libexec/check_disk
COMMAND_ARGUMENTS="-w 15% -c 10% -W 15% -K 10% -u GB -p / -p /usr -p /tmp -p /var -p /opt -p /export/home"
SEND_NSCA_COMMAND=/usr/local/nagios/bin/send_nsca
SEND_NSCA_CONFIG=/usr/local/nagios/etc/send_nsca.cfg
ECHO_COMMAND=/usr/bin/echo

OUTPUT=`${CHECK_COMMAND} ${COMMAND_ARGUMENTS}`
RESULT=`echo $?`

${ECHO_COMMAND} ${HOSTNAME}\\t${SERVICE_NAME}\\t${RESULT}\\t${OUTPUT}|${SEND_NSCA_COMMAND} -H ${SERVERNAME} -c ${SEND_NSCA_CONFIG}

# vi cpu_check
 #!/usr/bin/bash

HOSTNAME=`hostname`
SERVERNAME=sparc5
SERVICE_NAME=CPU
CHECK_COMMAND=/usr/local/nagios/libexec/check_procs
COMMAND_ARGUMENTS="-w 50 -c 80 -m CPU"
SEND_NSCA_COMMAND=/usr/local/nagios/bin/send_nsca
SEND_NSCA_CONFIG=/usr/local/nagios/etc/send_nsca.cfg
ECHO_COMMAND=/usr/bin/echo

OUTPUT=`${CHECK_COMMAND} ${COMMAND_ARGUMENTS}`
RESULT=`echo $?`

${ECHO_COMMAND} ${HOSTNAME}\\t${SERVICE_NAME}\\t${RESULT}\\t${OUTPUT}|${SEND_NSCA_COMMAND} -H ${SERVERNAME} -c ${SEND_NSCA_CONFIG}

# vi ssh_check
 #!/usr/bin/bash

HOSTNAME=`hostname`
SERVERNAME=sparc5
SERVICE_NAME=SSH_PROC
CHECK_COMMAND=/usr/local/nagios/libexec/check_procs
COMMAND_ARGUMENTS="-w 1:50 -c 1:100 -m PROCS -C sshd"
SEND_NSCA_COMMAND=/usr/local/nagios/bin/send_nsca
SEND_NSCA_CONFIG=/usr/local/nagios/etc/send_nsca.cfg
ECHO_COMMAND=/usr/bin/echo

OUTPUT=`${CHECK_COMMAND} ${COMMAND_ARGUMENTS}`
RESULT=`echo $?`

${ECHO_COMMAND} ${HOSTNAME}\\t${SERVICE_NAME}\\t${RESULT}\\t${OUTPUT}|${SEND_NSCA_COMMAND} -H ${SERVERNAME} -c ${SEND_NSCA_CONFIG}

# vi snmpd_check
 #!/usr/bin/bash

HOSTNAME=`hostname`
SERVERNAME=sparc5
SERVICE_NAME=SNMP_PROC
CHECK_COMMAND=/usr/local/nagios/libexec/check_procs
COMMAND_ARGUMENTS="-w 1:1 -c 1:1 -m PROCS -C snmpd"
SEND_NSCA_COMMAND=/usr/local/nagios/bin/send_nsca
SEND_NSCA_CONFIG=/usr/local/nagios/etc/send_nsca.cfg
ECHO_COMMAND=/usr/bin/echo

OUTPUT=`${CHECK_COMMAND} ${COMMAND_ARGUMENTS}`
RESULT=`echo $?`

${ECHO_COMMAND} ${HOSTNAME}\\t${SERVICE_NAME}\\t${RESULT}\\t${OUTPUT}|${SEND_NSCA_COMMAND} -H ${SERVERNAME} -c ${SEND_NSCA_CONFIG}

etc... carry on with things like SENDMAIL_PROC, IPMON_PROC, NTP_PROC, SYSLOG_PROC, USERS, etc

# vi check.sh
 #!/usr/bin/bash

SERVICES="cpu disk sshd snmpd sendmail ipmon xntpd syslogd user"
CHECK_PATH=/usr/local/nagios/bin

for service in ${SERVICES}; do
 ${CHECK_PATH}/${service}_check
done;


# crontab -e
 0,15,30,45 * * * * /usr/local/nagios/bin/check.sh

3. Let’s create our own plugin in /usr/local/nagios/libexec for checking the environmental parameters on some sparc machines. My other plug-ins

# vi libexec/check_env

#!/usr/bin/bash

if [[ $# > 0 ]]; then
  echo "Usage: check_env"
  exit 2
fi

status=0

/usr/sbin/prtdiag >/dev/null
exit_status=$?
case $exit_status in
 0)
  echo "SUCCESS: Temperature, fans, voltage, currents and indicators are OK"
 ;;
 1)
  echo "FAILURE: Environmental failure - check the output of prtdiag -v"
  status=1
 ;;
 2)
  echo "FAILURE: prtdiag returned an internal error"
  status=2
 ;;
esac

exit $status

# vi bin/env_check

#!/usr/bin/bash

HOSTNAME=`hostname`
SERVERNAME=sparc5
SERVICE_NAME=ENV
CHECK_COMMAND=/usr/local/nagios/libexec/check_env
COMMAND_ARGUMENTS=""
SEND_NSCA_COMMAND=/usr/local/nagios/bin/send_nsca
SEND_NSCA_CONFIG=/usr/local/nagios/etc/send_nsca.cfg
ECHO_COMMAND=/usr/bin/echo

OUTPUT=`${CHECK_COMMAND} ${COMMAND_ARGUMENTS}`
RESULT=`echo $?`

${ECHO_COMMAND} ${HOSTNAME}\\t${SERVICE_NAME}\\t${RESULT}\\t${OUTPUT}|${SEND_NSCA_COMMAND} -H ${SERVERNAME} -c ${SEND_NSCA_CONFIG}

# vi check.sh
 #!/usr/bin/bash

SERVICES="cpu disk env sshd snmpd sendmail ipmon xntpd syslogd user"
CHECK_PATH=/usr/local/nagios/bin

for service in ${SERVICES}; do
 ${CHECK_PATH}/${service}_check
done;

4. To install plugins on other machines with the same architecture and OS, just tar the files in /usr/local/nagios, copy the archive and extract them into /usr/local; for example,

# cd /usr/local
# tar -cf nagios-plugins.tar nagios
# scp nagios.tar <another_client>:~
<another_client> # cd /usr/local
<another_client> # tar -xf ~/nagios.tar

Modify files in /usr/local/nagios/bin (and the service definitions on the server) as necessary. For the central server to accept the results of the host or service passive checks, the host and/or the service must be defined in its config files. See the configuration of the central server above.