[zfs-discuss] Nagios monitor

Andrew Hamilton ahamilto at tjhsst.edu
Wed Sep 4 19:48:40 EDT 2013


I've attached two scripts that I use in Nagios. One monitors overall 
pool state and one monitors the individual filesystems.

Andrew Hamilton

On 9/4/2013 7:00 PM, Brian Menges wrote:
>
> Here's something that we use... very rough but if you added some 
> command line parsing (which I have in my TODO list) using like optarg, 
> then it would be very pretty!
>
> ----
>
> #!/usr/bin/env python
>
> #
>
> # This is a nagios plugin for monitoring
>
> # ZFS disk space. -russ 7/9/2013
>
> # Modified by brian 8/29/2013 to improve runtime
>
> #
>
> # Changelog:
>
> # 2013-07-09   Russ    Original script
>
> # 2013-08-29   Brian   Converted over to subprocess from os.popen 
> which is deprecated
>
> # Performance improvement (runtime down from 0m0.153s to 0m0.049s)
>
> # Using one normalize function to pretty print the large numbers in 
> meaningful units
>
> # Printing floats to the second decimal place
>
> # Some comments added to the file
>
> # Aligned to 2-space formatting
>
> #
>
> from __future__ import division
>
> import sys
>
> import subprocess
>
> def normalize(vol_size):
>
>   # Print human readable byte units
>
>   # Bytes      < (2 ** 10) or 1024
>
>   # Kilobytes  < (2 ** 20) or 1024*1024
>
>   # Megabytes  < (2 ** 30) or 1024*1024*1024
>
>   # Gigabytes  < (2 ** 40) or 1024*1024*1024*1024
>
>   # Terrabytes < (2 ** 50) or 1024*1024*1024*1024*1024
>
>   # Petabytes  < (2 ** 60) or 1024*1024*1024*1024*1024*1024
>
>   # Exabytes   < (2 ** 70) or 1024*1024*1024*1024*1024*1024*1024
>
>   # Zettabytes < (2 ** 80) or 1024*1024*1024*1024*1024*1024*1024*1024
>
>   # Yottabytes < (2 ** 90) or 1024*1024*1024*1024*1024*1024*1024*1024*1024
>
>   # Bigger = NaN (this function's version of an error... cause we 
> can't stuff Brontobytes in)
>
>   if vol_size < (2 ** 10):
>
> value = vol_size
>
> unit  = ''
>
>   elif vol_size < (2 ** 20):
>
> value = vol_size / (2 ** 10)
>
> unit  = 'K'
>
>   elif vol_size < (2 ** 30):
>
> value = vol_size / (2 ** 20)
>
> unit  = 'M'
>
>   elif vol_size < (2 ** 40):
>
> value = vol_size / (2 ** 30)
>
> unit  = 'G'
>
>   elif vol_size < (2 ** 50):
>
> value = vol_size / (2 ** 40)
>
> unit  = 'T'
>
>   elif vol_size < (2 ** 60):
>
> value = vol_size / (2 ** 50)
>
> unit  = 'P'
>
>   elif vol_size < (2 ** 70):
>
> value = vol_size / (2 ** 60)
>
> unit  = 'E'
>
>   elif vol_size < (2 ** 80):
>
> value = vol_size / (2 ** 70)
>
> unit  = 'Z'
>
>   elif vol_size >= (2 ** 80):
>
> value = vol_size / (2 ** 80)
>
> unit  = 'Y'
>
>   else:
>
> value = vol_size
>
> unit  = 'NaN'
>
>   unit = unit + 'B'
>
>   return (value, unit)
>
> # percent values to warn / crit at
>
> # TODO: might want to make this a command line option such as -w 
> <warn> -c <crit>
>
> warn_at = 70
>
> crit_at = 80
>
> if len (sys.argv) < 2:
>
>   print 'CRITICAL: You must provide a Zhast volume name!'
>
> sys.exit(2)
>
> # TODO: Remove harvest of sys.argv and instead use an arguement 
> parser, and require zvol arguement and others
>
> zfs_vol = sys.argv[1]
>
> # "zfs get -Hp used <zfs_vol>" is a better command to get the used 
> space in bytes
>
> zfs_cmd = 'zfs get -Hp used \'%s\'' % zfs_vol
>
> p = subprocess.check_output(zfs_cmd, shell=True)
>
> zfs_used = p.rstrip().split()
>
> zfs_used = int(zfs_used[2])
>
> # "zfs get -Hp availalble <zfs_vol>" is a better command to get the 
> available space in bytes
>
> zfs_cmd = 'zfs get -Hp available \'%s\'' % zfs_vol
>
> p = subprocess.check_output(zfs_cmd, shell=True)
>
> zfs_avail = p.rstrip().split()
>
> zfs_avail = int(zfs_avail[2])
>
> # Lets do a bit of math, and get normalized output strings
>
> zfs_total = zfs_avail + zfs_used
>
> zfs_percent = float((zfs_used/zfs_total)*100)
>
> zfs_used = "%.2f %s" % normalize(zfs_used)
>
> zfs_total = "%.2f %s" % normalize(zfs_total)
>
> msg = 'Usage on ' +  '%s is %.2f%% (%s / %s)' % (zfs_vol, zfs_percent, 
> zfs_used, zfs_total)
>
> if zfs_percent >= crit_at:
>
>   print 'CRITICAL: %s' % msg
>
> sys.exit(2)
>
> elif zfs_percent >= warn_at:
>
>   print 'WARNING: %s' % msg
>
> sys.exit(1)
>
> else:
>
>   print 'OK: %s' % msg
>
> sys.exit(0)
>
> ----
>
> - Brian Menges
>
> Principal Engineer, DevOps
>
> GoGrid | ServePath | ColoServe | UpStream Networks
>
> *From:*Eri Ramos Bastos [mailto:bastos.eri at gmail.com]
> *Sent:* Wednesday, September 04, 2013 13:00
> *To:* zfs-discuss at zfsonlinux.org
> *Subject:* [zfs-discuss] Nagios monitor
>
> Hi, all
>
> Does anyone have a nagios monitoring your zpools of zfs filesystems?
>
> We are looking to implement space utilization monitoring but could not 
> find anything useful, but before start re-inventing the wheel I 
> thought I should ask here.
>
> Thanks!
>
>
> Eri Ramos Bastos
>
> To unsubscribe from this group and stop receiving emails from it, send 
> an email to zfs-discuss+unsubscribe at zfsonlinux.org 
> <mailto:zfs-discuss+unsubscribe at zfsonlinux.org>.
>
>
> ------------------------------------------------------------------------
>
> The information contained in this message, and any attachments, may 
> contain confidential and legally privileged material. It is solely for 
> the use of the person or entity to which it is addressed. Any review, 
> retransmission, dissemination, or action taken in reliance upon this 
> information by persons or entities other than the intended recipient 
> is prohibited. If you receive this in error, please contact the sender 
> and delete the material from any computer.
> To unsubscribe from this group and stop receiving emails from it, send 
> an email to zfs-discuss+unsubscribe at zfsonlinux.org.

To unsubscribe from this group and stop receiving emails from it, send an email to zfs-discuss+unsubscribe at zfsonlinux.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://list.zfsonlinux.org/pipermail/zfs-discuss/attachments/20130904/e5164853/attachment.html>
-------------- next part --------------
#!/usr/bin/python

from subprocess import *

OK_STATE=0
WARNING_STATE=1
CRITICAL_STATE=2

ZFS_USE_WARNING=90
ZFS_USE_CRITICAL=95
DISKSPACECMD="sudo /bin/df -hP --type=zfs"

output = ""
criticals = 0
warnings = 0

diskspacelist=Popen(DISKSPACECMD.split(), stdout=PIPE, stderr=STDOUT).communicate()[0].split('\n')[1:-1]

filesystems={}

for zfs in diskspacelist:
	z = zfs.split()
	name = z[0]
	if z[3] == "denied":
		output += "Permission denied while checking disk space for: " + z[1] + ", "
		warnings += 1
	else:	
		filesystems[name] = {}
		filesystems[name]["size"] = z[1]
		filesystems[name]["used"] = z[2]
		filesystems[name]["free"] = z[3]
		filesystems[name]["use%"] = int(z[4].replace('%', ''))
		filesystems[name]["mountpoint"] = z[5]

for zfs in filesystems.keys():
	if filesystems[zfs]["use%"] >= ZFS_USE_CRITICAL:
		output += zfs + " is " + str(filesystems[zfs]["use%"]) + "% full! (" + filesystems[zfs]["used"] + "/" + filesystems[zfs]["size"] + "), "
		criticals += 1
	elif filesystems[zfs]["use%"] >= ZFS_USE_WARNING:
		output += zfs + " is " + str(filesystems[zfs]["use%"]) + "% full! (" + filesystems[zfs]["used"] + "/" + filesystems[zfs]["size"] + "), "
		warnings += 1

if criticals > 0:
	print "CRITICAL: " + output
	exit(CRITICAL_STATE)
elif warnings > 0:
	print "WARNING: " + output
	exit(WARNING_STATE)

output = "All ZFS filesystems are less than " + str(ZFS_USE_WARNING) + "% full."
print "OK: " + output
exit(OK_STATE)
-------------- next part --------------
#!/usr/bin/python

from subprocess import *

OK_STATE=0
WARNING_STATE=1
CRITICAL_STATE=2

ZPOOL_CAP_WARNING=80
ZPOOL_CAP_CRITICAL=90
ZPOOLCMD=["/sbin/zpool", "list", "-H"]

output = ""
criticals = 0
warnings = 0

zpoollist=Popen(ZPOOLCMD, stdout=PIPE).communicate()[0].split('\n')[:-1]

zpools={}

for zpool in zpoollist:
	z = zpool.split('\t')
	name = z[0]
	zpools[name] = {}
	zpools[name]["size"] = z[1]
	zpools[name]["alloc"] = z[2]
	zpools[name]["free"] = z[3]
	zpools[name]["cap"] = int(z[4].replace('%', ''))
	zpools[name]["dedup"] = z[5]
	zpools[name]["health"] = z[6]

for zpool in zpools.keys():
	if zpools[zpool]["health"] == "FAULTED":
		output += zpool + " is " + zpools[zpool]["health"] + "!, "
		criticals += 1
	elif zpools[zpool]["health"] == "DEGRADED":
		output += zpool + " is " + zpools[zpool]["health"] + "!, "
		warnings += 1
	
	if zpools[zpool]["cap"] >= ZPOOL_CAP_CRITICAL:
		output += zpool + " is over " + str(ZPOOL_CAP_CRITICAL) + "% full!, "
		criticals += 1
	elif zpools[zpool]["cap"] >= ZPOOL_CAP_WARNING:
		output += zpool + " is over " + str(ZPOOL_CAP_WARNING) + "% full!, "
		warnings += 1

	if zpools[zpool]["health"] == "ONLINE" and zpools[zpool]["cap"] < ZPOOL_CAP_WARNING:
		output += zpool + ":ONLINE with " + zpools[zpool]["alloc"] + "/" + zpools[zpool]["size"] + "(" + str(zpools[zpool]["cap"]) + "%) allocated, "

if criticals > 0:
	print "CRITICAL: " + output
	exit(CRITICAL_STATE)
elif warnings > 0:
	print "WARNING: " + output
	exit(WARNING_STATE)

if len(zpools.keys()) == 0:
	output = "No active zpools"

print "OK: " + output
exit(OK_STATE)


More information about the zfs-discuss mailing list