Tuesday, September 24, 2013

Restart a service checking a pattern in the event log

Restart a service checking a pattern in the event log and sending an e-mail to the xxxx@xxxx.com.

                                                                     
                                                                     
                                                                     
                                             

' ----- ExeScript Options Begin -----
' ScriptType: window,activescript,administrator
' DestDirectory: current
' CommandLine: -c 112 -p "3 / 10" -l Application -e "1 / 10"
' Icon: default
' 32Bit: yes
' Author: Babu Dhinakaran S
' ----- ExeScript Options End -----
'On Error Resume Next
Dim subStr
Dim Return1,Return2
Const CONVERT_TO_LOCAL_TIME = True
Dim strServiceName
Dim eCode
Dim patternMatch
Dim logFile
Dim aeCode, apatternMatch, alogFile
Dim dtmStartDate
Dim Flag

aeCode = Wscript.Arguments.Item(0)
eCode = CInt(Wscript.Arguments.Item(1))
apatternMatch = Wscript.Arguments.Item(2)
patternMatch = Wscript.Arguments.Item(3)
alogFile = Wscript.Arguments.Item(4)
logFile = Wscript.Arguments.Item(5)
aerrorPattern = Wscript.Arguments.Item(6)
errorPattern = Wscript.Arguments.Item(7)

'Wscript.Echo "aeCode"&aeCode
'Wscript.Echo "eCode"&eCode
'Wscript.Echo "apatternMatch"&apatternMatch
'Wscript.Echo "alogFile"&alogFile
'Wscript.Echo "logFile"&logFile
'Wscript.Echo "aerrorPattern"&aerrorPattern
'Wscript.Echo "errorPattern"&errorPattern

If aeCode = "-c" and apatternMatch = "-p" and alogFile = "-l" and aerrorPattern = "-e" Then
 Set objFSO = CreateObject("Scripting.FileSystemObject")
 fileName = "C:\Program Files\NSClient++\scripts\" & logFile & "_ "& ecode & ".txt"
 fileName2 = "C:\Program Files\NSClient++\scripts\" & logFile & "_ "& ecode & "_flag"&".txt"
 'Wscript.Echo "FileName:" & fileName

 'The Below File or fileName variable holds the TimeStamp of the EventLog

 If objFSO.FileExists(fileName) Then
  'Wscript.Echo "File Exists"
  Set objTextFile = objFSO.OpenTextFile(fileName, 1)
  Do Until objTextFile.AtEndOfStream
  dtmStartDate = objTextFile.Readline
  'Wscript.Echo " DtmStartDate:" & dtmStartDate
  
  Loop

 Else 
  Set objTextFile = objFSO.CreateTextFile(fileName, true)
  Set dtmStartDate = CreateObject("WbemScripting.SWbemDateTime")
  dtmStartDate.SetVarDate (CDate (now))
  'Wscript.Echo "File Doesn't exists create one"
  objTextFile.WriteLine(dtmStartDate)
  'Wscript.Echo " DtmStartDate:" & dtmStartDate
  'objTextFile.Close()
 End If


 'The below file holds the Flag value and initially the flag is set to 0 stating there were no error found

 If objFSO.FileExists(fileName2) Then
  'Wscript.Echo "File Exists"
  Set objTextFile1 = objFSO.OpenTextFile(fileName2, 1)
  Do Until objTextFile1.AtEndOfStream
  flag = CInt(objTextFile1.Readline)
  'Wscript.Echo "File Exists:" & flag
  
  Loop

 Else 
  Set objTextFile1 = objFSO.CreateTextFile(fileName2, true)
  'Wscript.Echo "File Doesn't exists create one and set falg to 0"
  objTextFile1.WriteLine("0")
  'objTextFile1.Close()
 End If



 strComputer = "."
 Set objWMIService = GetObject("winmgmts:" _
  & "{impersonationLevel=impersonate,(Security)}!\\" _
  & strComputer & "\root\cimv2")
 'Set the event code in the below WMI Query such as 112 or 113

 Set colEvents = objWMIService.ExecQuery _
  ("Select * from Win32_NTLogEvent Where Logfile = '" & logFile & "' and " & " TimeWritten > ' " & dtmStartDate & " ' and EventCode=" & eCode & "") 
  
  

 'Wscript.Echo logFile
 'Wscript.Echo eCode
 'Wscript.Echo dtmStartDate
 If Err.Number <> 0 Then

  'WScript.Echo "Error: " & Err.Number
  'WScript.Echo "Error (Hex): " & Hex(Err.Number)
  'WScript.Echo "Source: " &  Err.Source
  'WScript.Echo "Description: " &  Err.Description
  Err.Clear
  Wscript.Echo "Unable to Access the remote Server"
  Wscript.Quit(3)
 
    Else
      

  
   If flag = 0 Then 
   
     For each objEvent in colEvents
   'Wscript.Echo "Category: " & objEvent.Category
   'Wscript.Echo "Computer Name: " & objEvent.ComputerName
   'Wscript.Echo "Event Code: " & objEvent.EventCode
   'Wscript.Echo "Message: " & objEvent.Message

    subStr = objEvent.Message
    'Wscript.Echo "Message:" & subStr
   'Wscript.Echo "Record Number: " & objEvent.RecordNumber
   'Wscript.Echo "Source Name: " & objEvent.SourceName
   'Wscript.Echo "Time Written: " & objEvent.TimeWritten
   'Wscript.Echo "Event Type: " & objEvent.Type
   'Wscript.Echo "User: " & objEvent.User
   'Wscript.Echo objEvent.LogFile

   'Prepare a regular expression object
    strServiceName = objEvent.SourceName
    dateWritten = objEvent.TimeWritten
    'Wscript.Echo "Date Written:" & dateWritten
   
   
    Set myRegExp = New RegExp 
    myRegExp.IgnoreCase = True
    myRegExp.Global = True
     myRegExp.Pattern = errorPattern
   'Wscript.Echo "errorPattern:" & errorPattern
   ' Echo for each match
    Set myMatches = myRegExp.Execute(subStr)
    If myMatches.count > 0 Then
           
         
    'Wscript.Echo "Service will be restarted:" &strServiceName 
   
    'Set the New Time Stamp 
    Set objTextFile = objFSO.CreateTextFile(fileName, true)
    'Set dtmStartDate = CreateObject("WbemScripting.SWbemDateTime")
    'dtmStartDate.SetVarDate (dateWritten)
    'Wscript.Echo "ErrorPattern Date:" & dateWritten
    objTextFile.WriteLine(dateWritten)
    'objTextFile.Close()
   
    'Set the New Flag 1 if a match is found for errorPattern
    Set objTextFile1 = objFSO.CreateTextFile(fileName2, true)
    'Wscript.Echo "File Doesn't exists create one"
    objTextFile1.WriteLine("1")
    'objTextFile1.Close()
       Wscript.Echo "System has entered in CRITICAL State of /10"
    'For Each myMatch in myMatches
    '  Wscript.Echo myMatch.Value, "Found Match"
    '  Wscript.Echo "---------------------------------------------------------------"
    ' Next 
    Wscript.Quit(2)
    
         Else 
    'don't do anything let it to loop till the match
    
   End If
   Next
       Wscript.Echo "System has not entered in CRITICAL State of /10"
    Wscript.Quit(0)
          End If 
    
          If flag = 1 Then 
    
     For each objEvent in colEvents
    'Wscript.Echo "Category: " & objEvent.Category
    'Wscript.Echo "Computer Name: " & objEvent.ComputerName
    'Wscript.Echo "Event Code: " & objEvent.EventCode
    'Wscript.Echo "Message: " & objEvent.Message

    subStr = objEvent.Message
    'Wscript.Echo "Message:" & subStr
    'Wscript.Echo "Record Number: " & objEvent.RecordNumber
    'Wscript.Echo "Source Name: " & objEvent.SourceName
    'Wscript.Echo "Time Written: " & objEvent.TimeWritten
    'Wscript.Echo "Event Type: " & objEvent.Type
    'Wscript.Echo "User: " & objEvent.User
    'Wscript.Echo objEvent.LogFile

    'Prepare a regular expression object
    strServiceName = objEvent.SourceName
    dateWritten = objEvent.TimeWritten
    'Wscript.Echo "Date Written:" & dateWritten
   
    Set myRegExp = New RegExp 
    myRegExp.IgnoreCase = True
    myRegExp.Global = True
    myRegExp.Pattern = patternMatch
    'Wscript.Echo "Pattern Match:" & patternMatch
    ' Echo for each match
    Set myMatches = myRegExp.Execute(subStr)
       If myMatches.count > 0 Then
                        
    'Set the New Time Stamp 
    Set objTextFile = objFSO.CreateTextFile(fileName, true)
    'Set dtmStartDate = CreateObject("WbemScripting.SWbemDateTime")
    'dtmStartDate.SetVarDate (dateWritten)
    'Wscript.Echo "Pattern Date:" & dateWritten
    objTextFile.WriteLine(dateWritten)
    'objTextFile.Close()
   
    'Set the New Flag 0 if a match is found for patternMatch
    Set objTextFile1 = objFSO.CreateTextFile(fileName2, true)
    objTextFile1.WriteLine("0")
    'objTextFile1.Close()
    
    'For Each myMatch in myMatches
     'Wscript.Echo myMatch.Value, "Found Match"
     'Wscript.Echo "---------------------------------------------------------------"
     'Next 
       
    'Restart the Service and Send an successful or unsccessful message accordingly
    Set objMessage = CreateObject("CDO.Message")
    objMessage.From = "no-reply@cmcmarkets.com" 
    objMessage.To = "b.dhinakaran@cmcmarkets.com" 
    objMessage.Cc = "t.welsh@cmcmarkets.com"
    objMessage.Configuration.Fields.Item _
    ("http://schemas.microsoft.com/cdo/configuration/sendusing") = 2
    objMessage.Configuration.Fields.Item _
    ("http://schemas.microsoft.com/cdo/configuration/smtpserver") = "smtp.cmc.local"
    objMessage.Configuration.Fields.Item _
    ("http://schemas.microsoft.com/cdo/configuration/smtpserverport") = 25
    objMessage.Configuration.Fields.Update

    Set objWMIService = GetObject("winmgmts:{impersonationLevel=impersonate}!\\.\root\cimv2")
     Set colListOfServices = objWMIService.ExecQuery("Select * from Win32_Service Where Name ='" & strServiceName & "'") 
           
     If Err.Number <> 0 Then

      'WScript.Echo "Error: " & Err.Number
      'WScript.Echo "Error (Hex): " & Hex(Err.Number)
      'WScript.Echo "Source: " &  Err.Source
      'WScript.Echo "Description: " &  Err.Description
      Err.Clear
      objMessage.Subject = "Unable to invoke object to restart the service:" & strServiceName
      mailbody = "Please check to why this was not restarted automatically or manually restart the Service"
      objMessage.TextBody = mailbody
      objMessage.Send
      Wscript.Quit(3)

     Else
      'Stop Service
      For Each objService in colListOfServices
       Return1 = objService.StopService()
         If Return1 <> 0 Then
          'Wscript.Echo "Failed to Stop the Service and the Error code = " & Return1                                  
          objMessage.Subject = "Service didn't respond for Stop Signal:" & strServiceName
          mailbody = "Please check to why this was not restarted automatically or manually restart the Service"
          objMessage.TextBody = mailbody
          objMessage.Send
          Wscript.Quit(2)
         End If       
      Next

     Wscript.Sleep(20000)        'Sleep for 20 seconds before starting a service.

      'Start Service

      For Each objService in colListOfServices
       Return2 = objService.StartService()
         If Return2 <> 0 Then
          Wscript.Echo "Failed to Start the Service and the Error code = " & Return2 
          objMessage.Subject = "Service didn't respond for Start Signal after the Stop:" & strServiceName
          mailbody = "Please check to why this was not restarted automatically or manually restart the Service"
          objMessage.TextBody = mailbody
          objMessage.Send
          Wscript.Quit(2)
         Else
        
             WScript.Echo "Successful Restart of Service:" & strServiceName
          objMessage.Subject = "Successful Restart of Service:" & strServiceName
          mailbody = "Successful Restart of Service"
          objMessage.TextBody = mailbody
          objMessage.Send
         Wscript.Quit(0)

         End If

      Next
              
     End If
     
     
         Else 
        'Wscript.Echo "System has entered in CRITICAL State of /10"
     'Wscript.Quit(2) 
    
   End If
   Next
   Wscript.Echo "System has entered in CRITICAL State of /10"
   Wscript.Quit(2)
   End If
       
       
      
  End If

Else  
Wscript.Echo "Follow the Syntax: example: eventlog -c EventCode -p Pattern_to_Match_in_the_message_to_service_restart -l logfile_Application_System -e errorPattern_to_Match _in_then_message_to_trigger_an_alarm"  
End If  

Tuesday, July 30, 2013

Microsoft Exchange Queue monitoring Plugin for Nagios

# Test Queue Health
# Developed by Babu Dhinakaran S
# To execute from within NSClient++
#
#NSClient 0.4.1.90
#[/settings/external scripts/scripts]
#ps2 = cmd /c echo scripts\\test.ps1 | PowerShell.exe -command -

# On the check_nrpe command include the -t 30, since it takes some time to load the Exchange cmdlet's.

#Add-PSSnapin Microsoft.Exchange.Management.PowerShell.E2010

#Add-PSSnapin -Name Microsoft.Exchange.Management.PowerShell.E2010



[int[]]$NagiosStatus = "3"
$NagiosDescription = ""
$key = New-Object System.Collections.ArrayList
$value = New-Object System.Collections.ArrayList
$getpos = New-Object System.Collections.ArrayList
$i=0

[String[]]$scontent = ""
[String[]]$shcontent = ""
[String[]]$otcontent = ""

#$Queue = Get-Queue -server test.test.COM


               
ForEach ($Queue in Get-Queue -Server test.test.COM)
{            
                     # ForEach Queue Handle the Identity and Message Count
                     $kv=$Queue.Identity
                     $vv=$Queue.MessageCount
                     $r_key = $key.Add("$kv")
                     $r_value = $value.Add("$vv")
}

 #Write-Host "Before Any Operation:"
#Write-Host $key
#Write-Host $value

ForEach ($scontent in $key)
{
            if ($scontent.contains("Submission"))
            {
#           Write-Host "removed Submission"
#           Write-Host "$i th Position:" $i
            break
            }
      $i++
}

#Process the submission queue and remove it from the ArrayList
[int[]]$sub_val = $value[$i]
if ($sub_val -gt "10")
{
# Format the output for Nagios
    if ($NagiosDescription -ne "")   
         {
        $NagiosDescription = $NagiosDescription + ", "                
            }
        $NagiosDescription = $NagiosDescription + $key[$i] + " queue has " + $value[$i] + " messages"
                                                
        # Set the status to failed.
        $NagiosStatus = "2"
}
elseif ($sub_val -gt "6")
{
    if ($NagiosDescription -ne "")   
         {
        $NagiosDescription = $NagiosDescription + ", "                
            }
        $NagiosDescription = $NagiosDescription + $key[$i] + " queue has " + $value[$i] + " messages"
                                               
        # Don't lower the status level if we already have a critical event
        if ($NagiosStatus -ne "2")
            {
           $NagiosStatus = "1"
            }
                               
}
$key.RemoveRange($i,1)
$value.RemoveRange($i,1)

#Write-Host "After Submission Operation:"
#Write-Host $key
#Write-Host $value

$i=0
ForEach ($shcontent in $key)
{
if ($shcontent.contains("Shadow"))
{
#Write-Host "Removed Shadow"

      #Process the Shadow queue and remove it from the ArrayList
      #Write-Host "Value of value:" $value[$i]
      [int[]]$shadow_value = $value[$i]
     
      if ( $shadow_value -gt "3" )
      {
      # Format the output for Nagios
      if ($NagiosDescription -ne "")   
                  {
                  $NagiosDescription = $NagiosDescription + ", "                
                  }
            $NagiosDescription = $NagiosDescription + $key[$i] + " queue has " + $value[$i] + " messages"
                                               
        # Set the status to failed.
        $NagiosStatus = "2"
      }
      elseif ($shadow_value -gt "2")
      {
      if ($NagiosDescription -ne "")   
            {
            $NagiosDescription = $NagiosDescription + ", "                
            }
            $NagiosDescription = $NagiosDescription + $key[$i] + " queue has " + $value[$i] + " messages"
                                               
        # Don't lower the status level if we already have a critical event
        if ($NagiosStatus -ne "2")
            {
           $NagiosStatus = "1"
            }
                               
      }
$r_getops = $getpos.Add("$i")

#$key.RemoveRange($i,1)
#$value.RemoveRange($i,1)

}
$i = $i + 1
}

#key length calculation
# write a subroutine to calculate string lenght
Function flength ([String[]]$skey)
{
ForEach ( $content in $skey )
{
$len = $len + 1
}
return $len - 1
}

ForEach ( $pos in $getpos )
{

# if the range is not null, call subroutine to calculate the length of key and get the position to delete it.
$kl = flength $key

if ( $pos -gt $kl )
      {
      $key.RemoveRange($kl,1)
      $value.RemoveRange($kl,1)
      break
      }
     
else
      {
      $key.RemoveRange($pos,1)
      $value.RemoveRange($pos,1)
      }
}

     
#Write-Host "After Shadow Operation:"
#Write-Host $key
#Write-Host $value

$i=0
ForEach ($otcontent in $key)
{

#Process the Other queue
[int[]]$ot_value = $value[$i]
if ($ot_value -gt "10")
{
# Format the output for Nagios
    if ($NagiosDescription -ne "")   
         {
        $NagiosDescription = $NagiosDescription + ", "                
            }
        $NagiosDescription = $NagiosDescription + $key[$i] + " queue has " + $value[$i] + " messages"
                                               
        # Set the status to failed.
        $NagiosStatus = "2"
}
elseif ($ot_value -gt "5")
{
    if ($NagiosDescription -ne "")   
         {
        $NagiosDescription = $NagiosDescription + ", "                 
            }
        $NagiosDescription = $NagiosDescription + $key[$i] + " queue has " + $value[$i] + " messages"
                                               
        # Don't lower the status level if we already have a critical event
        if ($NagiosStatus -ne "2")
            {
           $NagiosStatus = "1"
            }
                               
}
$i = $i + 1
}
# Output, what level should we tell our caller?
if ($NagiosStatus -eq "2") {
                Write-Host "CRITICAL: " $NagiosDescription
                        exit 2
} elseif ($NagiosStatus -eq "1") {
                Write-Host "WARNING: " $NagiosDescription
                        exit 1
} else {
                Write-Host "OK: All mail queues within limits."
                        exit 0
}

#Write-Host $key and $value
#Write-Host "After all Operation:"
#Write-Host $key

#Write-Host $value

Friday, May 24, 2013

Band-width monitoring for CISCO 5K and 7K switches

Run the below perl script and re-direct the output to the /usr/local/nagios/libexec/band-width-output/host-ipaddress
----------------------------------------------------------------------------------------------------------

#!/usr/bin/perl -w
#Purpose         : To pull the interface band-width information from the 5K and 7K switches
#Authors         : Babu Dhinakaran S
#Date              : 24th May 2011
#Version         : V1.0

    use Net::SSH::Perl;
    $host="deviceip address";
    $user="username";
    $pass="password";
    $cmd="show interface";
    my $ssh = Net::SSH::Perl->new($host);
    $ssh->login($user, $pass);
    my($stdout, $stderr, $exit) = $ssh->cmd($cmd);
    print $stdout;

----------------------------------------------------------------------------------------------------------
Run the below script for pull the interface band-width information from the output file(/usr/local/nagios/libexec/band-width-output/host-ipaddress)
----------------------------------------------------------------------------------------------------------
#!/bin/bash
#Purpose         : To extract the bandwidth value from the file
#Authors         : Ranjith Kumar R
#Date            : 21st May 2011
#Version         : V1.0


PROGNAME=`/bin/basename $0`
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION="V1.0"
ECHO="/bin/echo"
STATE_UNKNOWN=3
STATE_OK=0
STATE_CRITICAL=2
STATE_WARNING=1
TAIL="/usr/bin/tail"
print_usage() {
    echo "Usage: $PROGNAME -H Hostaddress -i interfacename -u unit in Gigabytes -w warning percentage -c critical percentage"
    echo "Usage: $PROGNAME --help"
    echo "Usage: $PROGNAME --version"
}
print_help() {
    print_revision $PROGNAME $REVISION
    echo ""
    print_usage
    echo ""
    echo "Bandwidth monitor plugin for Nagios"
    echo ""
    support
}
# Make sure the correct number of command line
# arguments have been supplied
if [ $# -lt 5 ]; then
    print_usage
    exit $STATE_UNKNOWN
fi
# Grab the command line arguments

exitstatus=$STATE_WARNING #default
while test -n "$1"; do
    case "$1" in
        --help)
            print_help
            exit $STATE_OK
            ;;
        -h)
            print_help
            exit $STATE_OK
            ;;
        --version)
            print_revision $PROGNAME $REVISION
            exit $STATE_OK
            ;;
        -V)
            print_revision $PROGNAME $REVISION
            exit $STATE_OK
            ;;
        --hostaddress)
            HOSTADDRESS=$2
            shift
            ;;
        -H)
            HOSTADDRESS=$2
            shift
            ;;
--interface)
            interface=$2
            shift
            ;;
-i)
            interface=$2
            shift
            ;;

        --warning)
            warning=$2
            shift
            ;;

        --unit)
            unit=$2
            shift
            ;;
        -u)
            unit=$2
            shift
            ;;
        -w)
            warning=$2
            shift
            ;;
--critical)
            critical=$2
            shift
            ;;
        -c)
            critical=$2
            shift
            ;;
        -x)
            exitstatus=$2
            shift
            ;;
        --exitstatus)
            exitstatus=$2
            shift
            ;;
        *)
            echo "Unknown argument: $1"
            print_usage
            exit $STATE_UNKNOWN
            ;;
    esac
    shift
done

WORKINGDIR="/usr/local/nagios/libexec/band-width-output"

`lsof | grep "$HOSTADDRESS" > /dev/null`
        lsof_ret=`echo $?`

        until [ "1" -le "$lsof_ret" ]
        do
        `lsof | grep "$HOSTADDRESS" > /dev/null`
        lsof_ret=`echo $?`
        sleep 2
        done

unitbits=1000000000

bits=$( echo "$unitbits * $unit" | bc )
warning=$( echo "$bits * $warning" | bc )
warning=$( echo "$warning / 100" | bc )
critical=$( echo "$bits * $critical" | bc )
critical=$( echo "$critical / 100" | bc )

#bits=$(( 1000000000 * $unit ))
#warning=$((( $bits * $warning ) / 100 ))
#critical=$((( $bits * $critical ) / 100 ))

FILEAGE=`/usr/local/nagios/libexec/check_file_age -w 600 -c 900 -f $WORKINGDIR/$HOSTADDRESS`
result=`echo $?`

if [ "$result" -ne "0" ];
then
    $ECHO "UNKNOWN : Please contact Ranjith or Babu. $FILEAGE"

`/bin/echo "Please contact Ranjith or Babu" | /bin/mail -s "Incomplete file write call Ranjith or Babu" ranjith.rajendran\@test.com babu.dhinakaran\@test.com -- -f no_reply_nagios\@test.com`;
exit $STATE_UNKNOWN
exit $exitstatus

fi

inputvalue=`cat $WORKINGDIR/$HOSTADDRESS | grep -A 22 "$interface is" | egrep "input rate [0-9]* bits/sec" | awk '{print $5}'`
outputvalue=`cat $WORKINGDIR/$HOSTADDRESS | grep -A 22 "$interface is" | egrep "output rate [0-9]* bits/sec" | awk '{print $5}'`

if [ "$inputvalue" == "" ] || [ "$outputvalue" == "" ]; then
$ECHO "UNKNOWN : input and output values are null"
exitstatus=$STATE_UNKNOWN
                exit $exitstatus

        fi


`$ECHO $inputvalue | egrep -q ^[0-9]+$`
inputvalue_result=`echo $?`
`$ECHO $outputvalue | egrep -q ^[0-9]+$`
outputvalue_result=`echo $?`
inputusedper=$( echo "$inputvalue / $bits" | bc -l )
inputusedper=$( echo "$inputusedper * 100" | bc -l )
outputusedper=$( echo "$outputvalue / $bits" | bc -l )
outputusedper=$( echo "$outputusedper * 100" | bc -l )

if [ "$inputvalue_result" -ne 0 ] || [ "$outputvalue_result" -ne 0 ]; then

$ECHO "CRITICAL : input rate and output rate values are not integer"

                exitstatus=$STATE_CRITICAL

                exit $exitstatus
        fi

if [ "$warning" -gt "$critical" ]; then

$ECHO "CRITICAL : Warning threshold should be less than critical"

exitstatus=$STATE_CRITICAL

exit $exitstatus
        fi

if [ "$inputvalue" -gt "$critical" ] ||  [ "$outputvalue" -gt "$critical" ]; then

$ECHO "CRITICAL : High Band-width utilization on $interface interface input rate used percentage=$inputusedper"%" and output rate used percentage=$outputusedper"%"|INPUT_RATE_USED%=$inputusedper"%",OUTPUT_RATE_USED%=$outputusedper"%",input-rate=$inputvalue"bits";$warning;$critical,output-rate=$outputvalue"bits";$warning;$critical"

exitstatus=$STATE_CRITICAL

                exit $exitstatus
        fi


if [ "$inputvalue" -gt "$warning" ] || [ "$outputvalue" -gt "$warning" ]; then

$ECHO "WARNING : High Band-width utilization on $interface interface input rate used percentage=$inputusedper"%" and output rate used percentage=$outputusedper"%"|INPUT_RATE_USED%=$inputusedper"%",OUTPUT_RATE_USED%=$outputusedper"%",input-rate=$inputvalue"bits";$warning;$critical,output-rate=$outputvalue"bits";$warning;$critical"

exitstatus=$STATE_WARNING

exit $exitstatus
        fi

if [ "$inputvalue" -lt "$warning" ] || [ "$outputvalue" -lt "$warning" ]; then

$ECHO "OK: Band-width utilization on $interface interface input rate used percentage=$inputusedper"%" and output rate used percentage=$outputusedper"%"|INPUT_RATE_USED%=$inputusedper"%",OUTPUT_RATE_USED%=$outputusedper"%",input-rate=$inputvalue"bits";$warning;$critical,output-rate=$outputvalue"bits";$warning;$critical"

exitstatus=$STATE_OK

exit $exitstatus

else

$ECHO "UNKNOWN,High Band-width utilization on $interface interface input rate used percentage=$inputusedper"%" and output rate used percentage=$outputusedper"%"|INPUT_RATE_USED%=$inputusedper"%",OUTPUT_RATE_USED%=$outputusedper"%",input-rate=$inputvalue"bits";$warning;$critical,output-rate=$outputvalue"bits";$warning;$critical"

exitstatus=$STATE_UNKNOWN

exit $exitstatus

fi
fi



Monday, May 20, 2013

How to Retrieve command output of a remote machine using expect script


#!/usr/bin/expect
#Purpose : Retrieve command output of a remote machine using expect script
#Author : Ranjith Kumar R
#Date : 20th May 2013

set timeout 60
set log [open "log.txt" "w"]
spawn telnet IP address of a server
expect "login:"
send "username\r"
expect "Password:"
send "password\r"
expect "#"
send "command name"
send "\r"
set outcome $expect_out(buffer)
puts $log $outcome
send "exit\r"
expect eof

Monday, May 6, 2013

shell script for Generate the service.cfg for monitoring network device interfaces


#!/bin/bash
#Purpose : TO generate interface monitoring services.cfg



while read devicelist

do
ip=`echo $devicelist | awk '{print $2}'`
devicename=`echo $devicelist | awk '{print $1}'`

`snmpwalk -Os -c communitystring -v 1 $ip IfDescr | awk '{print $4}' > /tmp/interface.txt`


while read interfacename

do


echo "define service{
        host_name                       $devicename
        service_description             $interfacename
        use                             service
        check_command                   check_ifoperstatus!$interfacename!communitystring
}"

done < /tmp/interface.txt

done < /tmp/deviceip

Thursday, April 18, 2013

AIX5.3/6.1 NRPE install- pre-compiled



Monitoring AIX with Nagios

Purpose

This document describes how to monitor AIX servers using Nagios.

Overview

These instructions cover how to install and use pre-compiled binaries to monitor AIX 5.3 using NRPE.

Download Pre-Compiled Binaries

You will need to download two packages of pre-compiled binaries to your AIX server.
First download the pre-compiled Nagios plugin binaries for AIX 5.3 from the following URL:
cd /tmp
aix53_nrpe-nsca-plugins.tgz
Next download the pre-compiled NRPE binaries from the above URL as well:
AIX-5.3-nrpe-2.12-binaries.tar.gz

Create Directories

Login to your AIX server as the root user and run the following commands:
cd /usr/local
mkdir nagios
cd /var
mkdir run
cd run
touch nrpe.pid
And now change owner to nagios as
chown –R nagios.nagios ../run

Unpack Binaries

Next unpack the pre-compiled plugins and place the extracted files into the /usr/local/nagios directory using commands similar to the following:
cd /tmp
gunzip < aix53_nrpe-nsca-plugins.tgz | tar xvf –
cd /tmp/nagios
cp -R * /usr/local/nagios
Next, unpack the prec-ompiled NRPE binaries using commands similar to the following:
cd /tmp
gunzip < AIX-5.3-nrpe-2.12-binaries.tar.gz | tar xvf -
cp usr/local/nagios/bin/nrpe /usr/local/nagios/bin
cp usr/local/nagios/etc/nrpe.cfg /usr/local/nagios/etc

Create Nagios User and Group

Next, create a Nagios user and group on the AIX server.
Use the following command to create a new group:
mkgroup nagios
Make a home directory for a Nagios user using the following command:
cd /users/
mkdir nagios
Next, open up smitty and add a group and new user with the name
users and nagios. Launch smitty with the following command:
smitty group
                Group Name = users                                                 


smitty user
Once smitty openes, select Add a user, and use the
following settings:
• User NAME = nagios
• Primary Group = users
• Group SET = users,nagios
• HOME directory = /users/nagios

Set Permissions

Change folder permissions so NRPE will operate properly, with the following command:
chown -R nagios.nagios /usr/local/nagios
Verify the permissions on the directory using the following commands:
cd /usr/local/nagios
ls –l to check the permission

Specify NRPE Port Number

Next, edit the /etc/services file to add a port number for NRPE.
To edit the file, use the following command:
vi /etc/services
Add a line to the file that looks exactly like this:
nrpe 5666/tcp #nrpe
Save the file.

Edit the nrpe.conf

Comment the following line as shown below:
comment #server_address=127.0.0.1
 #allowed_hosts=127.0.0.1 in nrpe.conf

Configure NRPE for Automatic Startup

Next, configure NRPE to automatically start when the AIX server reboots. To do this, use the following command:
nohup /usr/local/nagios/bin/nrpe -n -c /usr/local/nagios/etc/nrpe.cfg –d
You should get this reponse:
Sending nohup output to nohup.out.
Note: The -n flag specifies that the NRPE server should run without SSL support. This reduces security of the NRPE daemon, but dramatically increases performance under heavy server load and may be okay if your server is on an internal network protected by a firewall.

Start NRPE

Start NRPE using the following command:
/usr/local/nagios/bin/nrpe -n -c /usr/local/nagios/etc/nrpe.cfg –d

Test Your NRPE Configuration

Test your AIX server to see if NRPE is running properly. To do this, use the following command:
ps -ef | grep nrpe
You should see something that looks like this:
nagios 111345 43675 /usr/local/nagios/bin/nrpe –c /usr/local/nagios/etc/nrpe.cfg -n –d
Note: The output you see may differ slightly, as the PID number will be different on your system.
Important: If you don't see any output when running the above command, it means something is wrong with your setup! This may be related to problems in your NRPE configuration file (/usr/local/nagios/etc/nrpe.cfg).

Monday, April 1, 2013

Nagios service dependency configuration generator script


#!/bin/bash
#Date : 1st April 2012
#Purpose : Nagios service dependency configuration generator script
#Authors : Babu Dhinakaran S and Ranjith Kumar R


`grep host_name /usr/local/nagios/var/status.dat | cut -d= -f2 | sort | uniq > /tmp/servers`

while read line
do
`grep -A1 -B0 $line /usr/local/nagios/var/status.dat | grep service_description | cut -d= -f2 | sort | uniq > /tmp/tempvariable`
count=`wc -l < /tmp/tempvariable`
if [ $count -le 1 ]
then
check=`cat /tmp/tempvariable`
if [ "$check" == "PingCheck" ] || [ "$check" == ""  ]
then
echo $line
fi

else
temp=""
        while read line2
                do
                if [ "$line2" != "PingCheck" ]
                then
                temp=`echo $temp,$line2`
                fi
        done < /tmp/tempvariable

# remove the comma
temp=$(echo $temp | cut -c 2-)


`echo "define servicedependency{
        host_name                       $line
        service_description             PingCheck
        dependent_host_name             $line
        dependent_service_description   $temp
        execution_failure_criteria      n
        notification_failure_criteria   u,c
       }" >> /tmp/ibsserdepdefinition.txt`

fi
done < /tmp/servers