Wrapping it up. A little wsgi, a little soup, a little xlwt.

Written by  on February 25, 2013

I wanted to create an easy way for my manager to pull the monthly uptime reports from nagios so I wrapped the script I wrote last week into a webapp2 based report generator which takes on two types of requests

  • /reports/generator/monthly?month=[n]
  • /reports/generator/timespan?fromdate=Dow Mon dd yyyy&todate=Dow Mon dd yyyy

    The date range format was determined by the easiest to find and impliment javascript date picker I could find. It’s called pickaday http://dbushell.com/2012/10/09/pikaday-javascript-datepicker/

    Since WSGI was already required to run up the graphix server on our monitoring box I only needed to run up the webapp2 framework.It would be nice if there were a package for it but instead we had to install the packages from python using pip. http://webapp-improved.appspot.com/tutorials/quickstart.nogae.html

    $ pip install WebOb
    $ pip install Paste
    $ pip install webapp2
    

    One of the things I keep struggling with is how to mix and map wsgi apps with “normal” apache this is what I wound up with.

            WSGIScriptAlias /reports/generator /var/www/reports/generator/nag2xls.wsgi
    
            <Directory "/var/www/reports/generator">
                    options +ExecCGI
                    AddHandler wsgi-script .wsgi
                    Order deny,allow
                    Allow from all
            </Directory>
    

    Most of the resulting app is getting the date range localized and as a unix timestamp for nagios. The rest of it is more or less what I already written to create the spreadsheet.

    import cgi
    import webapp2
    from BeautifulSoup import BeautifulSoup
    import urllib2, base64
    import xlwt
    import datetime
    import time
    
    
    class downloadMonthlyReport(webapp2.RequestHandler):
        def get(self):
            today=datetime.datetime.utcnow()
            theMonth=int(self.request.get('month'))
            print "theMonth=%d" % theMonth
            fromDate=datetime.datetime(today.year,theMonth,1,0,0,0)+datetime.timedelta(seconds=time.timezone) 
            if (theMonth > 11):
                toDate=datetime.datetime(today.year+1,1,1,0,0,0)+datetime.timedelta(seconds=time.timezone-1)
            else:
                toDate=datetime.datetime(today.year,theMonth+1,1,0,0,0)+datetime.timedelta(seconds=time.timezone-1)
            #print fromDate, "->", toDate
            fromEpoch=int((fromDate - datetime.datetime(1970,1,1)).total_seconds()) 
            toEpoch=int((toDate - datetime.datetime(1970,1,1)).total_seconds())
            self.response.headers['Content-Type'] = 'application/vnd.ms-excel'
            self.response.headers['Content-Disposition'] = 'attachment;filename="Uptime%s.xls"' % fromDate.strftime("%b%y")
            
            return build_sheet(self.response.out,'xxxx','xxxx',fromEpoch,toEpoch)
    
            
    class downloadTimeFrameReport(webapp2.RequestHandler):
        def get(self):
            #print self.request
            aDayMinusASecond=86399
            fromDate=int(time.mktime(time.strptime(self.request.get('fromdate'),'%a %b %d %Y')))
            toDate=int(time.mktime(time.strptime(self.request.get('todate'),'%a %b %d %Y'))) + aDayMinusASecond
            #print fromDate, "->", toDate
            self.response.headers['Content-Type'] = 'application/vnd.ms-excel'
            self.response.headers['Content-Disposition'] = 'attachment;filename="Uptime%s-%s.xls"' % (
                                             datetime.datetime.fromtimestamp(fromDate).strftime("%d%b%y"),
                                             datetime.datetime.fromtimestamp(toDate).strftime("%d%b%y"),
                                            )
                                                             
            return build_sheet(self.response.out,'xxxxx','xxxxx',fromDate,toDate)
            
            
    application = webapp2.WSGIApplication([('/reports/generator/timeframe.*', downloadTimeFrameReport),
                                          # ('/reports/generator/monthly.*', downloadMonthlyReport)
                                           ('/.*', downloadMonthlyReport)
                                           ],
                                  debug=True)
    
    def build_sheet(myfile,username,password,startTime,endTime):
        
        reportRequest='http://idcctrl1/cgi-bin/nagios3/avail.cgi?t1=%d&t2=%d&show_log_entries=&hostgroup=all' % ( startTime , endTime)
        print reportRequest
        request = urllib2.Request(reportRequest)
        base64string = base64.encodestring('%s:%s' % (username, password)).replace('\n', '')
        request.add_header("Authorization", "Basic %s" % base64string)   
        result = urllib2.urlopen(request).read()
        soup = BeautifulSoup(''.join(result))
        
        workbook = xlwt.Workbook()
        sheet1 = workbook.add_sheet('By Group')
        sheet2 = workbook.add_sheet('By Server')
        
        row_count = 0
        s2rc=0
        maxCol1Width=0
        
        littleFont = xlwt.Font() # Create the Font
        littleFont.name = 'Times New Roman'
        littleFont.height = 0x00A0 # ~8pt
        littleFont.italic = True
        lableStyle=xlwt.XFStyle()
        lableStyle.font = littleFont
        
        emFont = xlwt.Font() # Create the Font
        emFont.name = 'Times New Roman'
        #emFont.height = 0x00A0 # ~8pt
        emFont.italic = True
        emFont.bold = True
        headerStyle=xlwt.XFStyle()
        headerStyle.font = emFont
        
        percentStyle = xlwt.XFStyle()
        percentStyle.num_format_str = '##0.00%' 
        
        sheet1.write(row_count,0,
                     "Uptime Report : %s to %s" % (
                       datetime.datetime.fromtimestamp(startTime).strftime("%d/%b/%y"),
                       datetime.datetime.fromtimestamp(endTime).strftime("%d/%b/%y")
                       ),
                      headerStyle)
        row_count+=1
                       
        sheet1.write(row_count,0,'Host Name',lableStyle) 
        sheet1.write(row_count,1,'Up',lableStyle) 
        sheet1.write(row_count,2,'Down',lableStyle) 
        sheet1.write(row_count,3,'Unreachable',lableStyle) 
        sheet1.write(row_count,4,'Unknown',lableStyle)
        row_count+=1
         
        for header in soup.findAll("div", { "class" : "dataTitle" }):
            title = header.find(text=True)
            if "All Hostgroups" in title: 
                continue
            #print "\n",title
            row_count+=1
            sheet1.write(row_count,0,title,headerStyle)
            
            table = header.findNext('table', { "class" : "data" })
            try:  
                rows = table.findAll('tr')
                for tr in rows:
                    cols = tr.findAll('td')
                    column=0
                    skipThisRow=False
                    for td in cols:
                        text = ''.join(td.find(text=True)).split(' ',1)[0]
                        if column==0:
                            cw=len(text)*256
                            if cw>maxCol1Width:
                                maxCol1Width=cw
                                sheet1.col(0).width = maxCol1Width
                                sheet2.col(0).width = maxCol1Width
                            if 'Average' in text:
                                sheet1.write(row_count,column,text.strip('%'),headerStyle)
                            else:
                                sheet1.write(row_count,column,text.strip('%'))
                        else:
                            sheet1.write(row_count,column,float(text.strip('%'))/100.00,percentStyle)
                        if column==0:
                            if 'Average' in text:
                                skipThisRow=True
                        if not skipThisRow and column<2:
                            if column==0:
                                sheet2.write(s2rc,column,text.strip('%'))
                            else: 
                                sheet2.write(s2rc,column,float(text.strip('%'))/100.00,percentStyle)
                        column+=1
                        #print text+"|",
                    if (skipThisRow is False and column>0):
                        s2rc+=1
                    #print
                    row_count+=1
            except Exception as e:
                print str(e)
        
        workbook.save(myfile)
        
    
    

Pulling uptime reports (straight to excel) from Nagios.

Written by  on February 18, 2013

The other day I was looking at automating the clean-up of the CA uptime reports into documents that could be turned into graphs using powershell. But really the end goal here is to get rid of the CA suit since no one employed here knows how to use it, it takes 5 systems and costs an exorbitant amount of money each year.

Since we had had a few outages in the past few weeks I thought I would look at the uptime reports available from Nagios, they are as good as the CA in terms of usable content and based on the outages we had experienced they seemed pretty accurate.

What was missing was to get them into a csv or excell format. Most of the searching I did lead me to a lot of people who hacked the source code to produce csv instead of the html pictured above. I found an easier way using a couple of python libraries that I have been checking out.

BeautifulSoup — a parsing library for xml/html
xlwt — a library for creating excel files directly

The following code creates a two sheet spreadsheet one broken out by group and one with just the by server uptimes.

#!/usr/bin/python
#TODO make a webapp2 wrapper for this.
#
from BeautifulSoup import BeautifulSoup
import urllib2, base64
import xlwt
username='xxxxx'
password='xxxxx'
spreadsheet='../Desktop/xfer/uptime.xls'

startTime=1358556160
endTime=1361234560

reportRequest='http://idcctrl1/cgi-bin/nagios3/avail.cgi?t1=%d&t2=%d&show_log_entries=&hostgroup=all' % ( startTime , endTime )
request = urllib2.Request(reportRequest)
base64string = base64.encodestring('%s:%s' % (username, password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
result = urllib2.urlopen(request).read()
soup = BeautifulSoup(''.join(result))

workbook = xlwt.Workbook()
sheet1 = workbook.add_sheet('By Group')
sheet2 = workbook.add_sheet('By Server')

row_count = 0
s2rc=0
maxCol1Width=0

littleFont = xlwt.Font() # Create the Font
littleFont.name = 'Times New Roman'
littleFont.height = 0x00A0 # ~8pt
littleFont.italic = True
lableStyle=xlwt.XFStyle()
lableStyle.font = littleFont

emFont = xlwt.Font() # Create the Font
emFont.name = 'Times New Roman'
#emFont.height = 0x00A0 # ~8pt
emFont.italic = True
emFont.bold = True
headerStyle=xlwt.XFStyle()
headerStyle.font = emFont

percentStyle = xlwt.XFStyle()
percentStyle.num_format_str = '##0.00%' 

sheet1.write(row_count,0,'Host Name',lableStyle)
sheet1.write(row_count,1,'Up',lableStyle)
sheet1.write(row_count,2,'Down',lableStyle)
sheet1.write(row_count,3,'Unreachable',lableStyle)
sheet1.write(row_count,4,'Unknown',lableStyle)
row_count+=1

for header in soup.findAll("div", { "class" : "dataTitle" }):
    title = header.find(text=True)
    if "All Hostgroups" in title:
        continue
    print "\n",title
    row_count+=1
    sheet1.write(row_count,0,title,headerStyle)

    table = header.findNext('table', { "class" : "data" })
    try:
        rows = table.findAll('tr')
        for tr in rows:
            cols = tr.findAll('td')
            column=0
            skipThisRow=False
            for td in cols:
                text = ''.join(td.find(text=True)).split(' ',1)[0]
                if column==0:
                    cw=len(text)*256
                    if cw>maxCol1Width:
                        maxCol1Width=cw
                        sheet1.col(0).width = maxCol1Width
                        sheet2.col(0).width = maxCol1Width
                    if 'Average' in text:
                        sheet1.write(row_count,column,text.strip('%'),headerStyle)
                    else:
                        sheet1.write(row_count,column,text.strip('%'))
                else:
                    sheet1.write(row_count,column,float(text.strip('%'))/100.00,percentStyle)
                if column==0:
                    if 'Average' in text:
                        skipThisRow=True
                if not skipThisRow and column0):
                s2rc+=1
            print
            row_count+=1
    except Exception as e:
        print str(e)

workbook.save(spreadsheet)

 

Visualizing thrash.

Written by  on February 12, 2013

For the past few months there have been consultants helping our dba’s get the oracle financial servers to oracle 12. They have been bringing the servers (a few of which are under memoried) to their knees. So they asked us to monitor memory processor and swap during their upgrades. We have the very expensive and labour intensive CAsuit of monitoring but when I went to look at the performance graphs they showed a swap usage of 11380.87% utilization, and blank graphs.

After a bit of research (pertinent links below) I found a this thing called graphite. Graphite has 3 components: a daemon to capture datapoints (carbon), a way to store them effectively (whisper) and a presentation layer (graphite).

I followed mostly this blog http://coreygoldberg.blogspot.com/2012/04/installing-graphite-099-on-ubuntu-1204.html which worked on my fresh 12.04 desktop. It didn’t work so well on the monitoring system or on my 10.4 based server but when I got stuck I just installed from source http://graphite.readthedocs.org/en/0.9.10/install-source.html Oh AND 12.04 does not have ceres library which for some reason doesnt show up in the pre-reqs test git it at https://github.com/graphite-project/ceres .

 


I was quickly able to put up the information that we wanted to monitor but when I tried to work on one of the solaris zones I started seeing negative memory usage.

load averages:  1.60,  1.63,  1.54;               up 94+13:00:11       17:22:10
285 processes: 283 sleeping, 2 on cpu
CPU states: 97.0% idle,  1.5% user,  1.5% kernel,  0.0% iowait,  0.0% swap
Kernel: 3439 ctxsw, 121 trap, 3248 intr, 3998 syscall, 96 flt, 4296 pgin
Memory: 35G phys mem, 78G free mem, 35G total swap, 29G free swap

The zone has 35G of memory allocated to it. But top sees the memory for the entire system. (this is probably the same mistake that CA was making) So on machines which are not global zones we use vmstat to get the free memory.

#!/usr/bin/env python

import platform
import socket
import time
import subprocess

CARBON_SERVER = '192.168.116.7'
CARBON_PORT = 2003
DELAY = 15  # secs

totalmem=1
meminuse=1
totalswap=1
swapinuse=1

def MGKtoZeros(numstr):
    K=1024
    M=K*1024
    G=M*1024
    if (numstr.rfind('G')>0):
        return float(numstr.replace('G',''))*G
    if (numstr.rfind('M')>0):
        return float(numstr.replace('M',''))*M
    if (numstr.rfind('K')>0):
        return float(numstr.replace('K',''))*K

def get_memstats():
    p=subprocess.Popen("/opt/it/bin/top -b|grep -i memory",
                       shell=True, stdout=subprocess.PIPE)
    o=p.communicate()[0]
    p=subprocess.Popen("/opt/it/bin/top -b|grep -i states",
                       shell=True, stdout=subprocess.PIPE)
    c=p.communicate()[0]
    p=subprocess.Popen("vmstat|tail -1",
                       shell=True, stdout=subprocess.PIPE)
    v=p.communicate()[0]
    tm=MGKtoZeros(o.split()[1])
    #mu=float(v.split()[4])
    mu=MGKtoZeros(o.split()[4])
    ts=MGKtoZeros(o.split()[7])
    su=MGKtoZeros(o.split()[10])
    cpu=100.00-float(c.split()[2].strip('%'))
    #su=float(v.split()[3])
    return (tm,tm-mu,ts,ts-su,cpu)

def send_msg(message):
    print 'sending message:\n%s' % message
    sock = socket.socket()
    sock.connect((CARBON_SERVER, CARBON_PORT))
    sock.sendall(message)
    sock.close()

if __name__ == '__main__':
    node = platform.node().replace('.', '-')
    while True:
      try:
        timestamp = int(time.time())
        (totalmem,meminuse,totalswap,swapinuse,cpupct)=get_memstats()
        print totalmem,meminuse,totalswap,swapinuse
        lines = [
            'system.%s.pct_mem_used %s %d' % (node, (meminuse/totalmem)*100, timestamp),
            'system.%s.pct_swap_used %s %d' % (node,(swapinuse/totalswap)*100, timestamp),
            'system.%s.pct_cpu_used %s %d' % (node,cpupct, timestamp),
        ]
        message = '\n'.join(lines) + '\n'
        send_msg(message)
      except Exception as e:
        print str(e)
      time.sleep(DELAY)