#!/bin/sh
#
# About
# -----
# This script sends simple system-metrics to a remote graphite server.
#
#
# Metrics
# -------
# The metrics currently include the obvious things such as:
#
#   * System uptime.
#   * Percentage of disk-free for all mount-points.
#   * Swap In/Out status.
#   * Process-count and fork-count.
#   * NTP-skew.
#
# The metrics can easily be updated on a per-host basis via the inclusion
# of local scripts.
#
#
# Extensions
# ----------
# Any file matching the pattern /etc/graphite_send.d/* will be executed
# and the output will be used to add additional metrics to be sent.
#
# The shell-scripts will be assumed to output values such as:
#    metric.name1  33
#    metric.name2  value
#
# The hostname and the time-period will be added to the data before it
# is sent to the graphite host.
#
#
# Usage
# -----
# Configure the host/port of the graphite server in the file
# /etc/default/graphite_send:
#
#   echo HOST=1.2.3.4 >  /etc/default/graphite_send
#   echo PORT=2004    >> /etc/default/graphite_send
#
# Then merely run `graphite_send`, and the metrics will be sent.
#
# **NOTE**: Some metrics will only be sent if the invoking user is root.
#
# If you wish to see what is being sent run:
#
#    graphite_send -v
#
# If you just want to see which metrics would be sent, but not send any:
#
#    graphite_send -nv
#
#
# License
# -------
# Copyright (c) 2014 by Steve Kemp.  All rights reserved.
#
# This script is free software; you can redistribute it and/or modify it under
# the same terms as Perl itself.
#
# The LICENSE file contains the full text of the license.
#


#
# Setup a sane path
#
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
export PATH


#
# The default timeout period
#
TIMEOUT=5


#
# Read any options from the command-line.
#
while getopts hnvt: name
do
    case $name in
        v)
            VERBOSE=1
            ;;
        t)
            TIMEOUT=$OPTARG
            ;;
        n)
            NOP=1
            ;;
        h|?)
            exe=$(basename $0)
            cat <<-END >&2
USAGE: $exe [options]
$exe - Send data to a remote graphite server.

Valid options are:

 -h   Show help.
 -n   no-operation, don't actually send any data.  (Useful with -v.)
 -t N Set the timeout to N-seconds when invoking netcat. (Default: 5)
 -v   Be verbose, and show the data which is to be sent.
END
        exit 0
        ;;
    esac
done

#
#  If we have a configuration file then load it, if not then abort.
#
if [ -e /etc/default/graphite_send ]; then
    .  /etc/default/graphite_send
else
    echo "You must configure the remote host:port to send data to"
    echo "Please try:"
    echo " "
    echo "  echo HOST=1.2.3.4 > /etc/default/graphite_send"
    echo "  echo PORT=2003    >>/etc/default/graphite_send"
    exit 1
fi




###
##
## A simple function to send data to a remote host:port address.
##
###
send()
{
    if [ ! -z "$VERBOSE"  ]; then
        echo "Sending : $1"
    fi

    if [ ! -z "$NOP" ]; then
        return
    fi

    #
    # If we have nc then send the data, otherwise alert the user.
    #
    if ( which nc >/dev/null 2>/dev/null ); then
        echo $1 | nc $HOST $PORT --send-only -w "$TIMEOUT" -t "$TIMEOUT" 2>/dev/null
    else
        echo "nc (netcat) is not present.  Aborting"
    fi

}



###
##
##  Collect metrics, and send them.
##
##  We aim to be generic here, and allow missing files/tools to
## guide us on the metrics to send.
##
##  i.e. We shoudln't assume things are present because this might
## run on Debian, CentOS, etc.
##
####


#
# Get the current hostname, via $HOSTNAME, or `hostname`.
#
host=$HOSTNAME
if [ -z "$HOSTNAME" ]; then
    host=$(hostname --short)
fi

#
#  Make sure it is unqualified.
#
host=$(echo $host | awk -F. '{print $1}')


#
# Are we root?  Some metrics will care
#
root=0
if [ "$(id -u)" = "0" ]; then
    root=1
fi


#
# The current time - we want all metrics to be submitted at the same time.
#
time=$(date +%s)


##
## Fork-count
##
if [ -e /proc/stat ]; then
    forked=`awk '/processes/ {print $2}' /proc/stat`
    send "$host.process.forked $forked $time"
fi


##
## Process-count
##
if ( which ps >/dev/null 2>/dev/null ); then
    pcount=`ps -Al | wc -l`
    send "$host.process.count  $pcount $time"
fi


##
## The percentage of disk free on each mount-point
##
df --portability 2>/dev/null | grep ^/ |  while read i
do
   name=$(echo $i | awk '{print $1}' | awk -F/ '{print $NF}')
   perc=$(echo $i | awk '{print $5}' | tr -d \% )
   send "$host.mount.$name $perc $time"
done


##
## Swap
##
if ( which vmstat >/dev/null 2>/dev/null ); then
    INN=`vmstat -s | awk '/pages swapped in/ {print $1}'`
    OUT=`vmstat -s | awk '/pages swapped out/ {print $1}'`
    send "$host.swap.in  $INN $time"
    send "$host.swap.out $OUT $time"
fi


##
## Uptime.
##
if [ -e /proc/uptime ]; then
    uptime=$(awk -F\. '{print $1}'  /proc/uptime)
    send "$host.uptime $uptime $time"
fi


##
## Load average
##
if [ -e /proc/loadavg ]; then
    avg1=$(awk '{print $1}'  /proc/loadavg)
    send "$host.load $avg1 $time"
fi


##
## Number of login sessions
##
if ( which w >/dev/null 2>/dev/null ); then
    users=$(w -h | wc -l)
    send "$host.users $users $time"
fi



##
## For each interface transmit/receive in bytes
##
## This is nasty so we only send data if we're sure about it.
##
for i in $(/sbin/ifconfig  | grep encap. | awk '{print $1}'); do
    rx=$(/sbin/ifconfig $i | grep "RX bytes"  | awk '{print $2}' | awk -F: '{print $2}')
    tx=$(/sbin/ifconfig $i | grep "TX bytes"  | awk '{print $2}' | awk -F: '{print $2}')
    if [ ! -z "$rx" ]; then
        send "$host.net.$i.rx $rx $time"
    fi
    if [ ! -z "$tx" ]; then
        send "$host.net.$i.tx $tx $time"
    fi
done


##
##  If we have ntpdate installed look for the skew.
##
if ( which ntpdate >/dev/null 2>/dev/null); then

    if [ "$root" = "1" ]; then

        #
        #  We only run if  (mod($minute, 2) == 0)
        #
        #  This is to avoid being rate-limited by the remote NTP-server(s).
        #
        min=$(date +%M)
        mod=$(expr $min % 2)

        if [ "$mod" = "0" ]; then
            skew=$(ntpdate -q -u pool.ntp.org 2>/dev/null | grep adjust | awk '{print $(NF-1)}' | tr -d -)
            send "$host.ntp-skew $skew $time"
        fi
    fi
fi


##
##  If we have dpkg/apt-get then show package counts and pending
## upgrades.
##
if ( which dpkg >/dev/null 2>/dev/null ); then
    packages=$(dpkg --list 2>/dev/null | grep ^ii | wc -l)
    send "$host.packages.installed $packages $time"
fi
if ( which apt-get >/dev/null 2>/dev/null ); then
    if [ "$root"  = "1" ]; then
        pending=$(apt-get upgrade --simulate 2>/dev/null | grep ^Inst | wc -l )
        send "$host.packages.pending-updates $pending $time"
    fi
fi



###
##
##  Now we load per-host metrics
##
##
## Execute every script matching the pattern /etc/graphite_send.d/*
##
##  It is assumed the output will be:
##
##      host.metric.name1 233
##      host.metric.name2 233
##      host.metric.name1 233
##
##  We add on "$hostname", and we add on the time-period, then we submit
##
###
for i in /etc/graphite_send.d/* ; do

    if [ -x $i -a -e $i ]; then

        # run the script, capturing the output
        $i |  while read line
        do
            metric=$(echo $line | awk '{print $1}')
            value=$(echo $line | awk '{print $2}')

            send "$host.$metric $value $time"
        done

    fi
done
