Friday, August 12, 2011

Nagios Check for Scheduled TSM Backups (Updated)

I have cleaned the code from my previous post, Nagios Check for Scheduled TSM Backups, that included some superfluous code derived from my post, Audit Tivoli Storage Manager Backup Client Schedules on Windows Servers. The updated code is more efficient and returns service status faster in the event of an extended period of "deafness" between the client and the TSM server and subsequent restart of the TSM service.
param([string]$tsmFile)
#--------------------------------------------------------------------------------------------------#
Function Get-TSMInformation {
 $tsmInformation = @()
 $schedulerServices = "HKLM:\SOFTWARE\IBM\ADSM\CurrentVersion\BackupClient\Scheduler Service"
 $windowsServivces = "HKLM:\SYSTEM\CurrentControlSet\Services"
 if(Test-Path -path $schedulerServices) {
  $tsmServices = @(Get-Item $schedulerServices | ForEach-Object { $_.Property })
  foreach($windowsService in Get-ChildItem $windowsServivces) {
   if($tsmServices -contains (Split-Path -leaf $windowsService.Name)) {
    $tsmServiceName = (Split-Path -leaf $windowsService.Name)
    $tsmServiceSubKey = Get-Item ($windowsServivces + "\" + (Split-Path -leaf $windowsService.Name))
    $startValue = $tsmServiceSubKey.GetValue("Start")
    $clientNodeName = ($tsmServiceSubKey.OpenSubKey("Parameters")).GetValue("clientNodeName")
    $scheduleLog = ($tsmServiceSubKey.OpenSubKey("Parameters")).GetValue("scheduleLog")
    $currentStatus = (Get-Service -name $tsmServiceName).Status 
    $clientNodeInformation = New-Object -typeName PSObject
    Add-Member -inputObject $clientNodeInformation -type NoteProperty -name "tsmServiceName" -value $tsmServiceName
    Add-Member -inputObject $clientNodeInformation -type NoteProperty -name "startValue" -value $startValue
    Add-Member -inputObject $clientNodeInformation -type NoteProperty -name "clientNodeName" -value $clientNodeName
    Add-Member -inputObject $clientNodeInformation -type NoteProperty -name "scheduleLog" -value $scheduleLog
    Add-Member -inputObject $clientNodeInformation -type NoteProperty -name "status" -value $currentStatus
    $tsmInformation += $clientNodeInformation
   }
  }
 }
 return $tsmInformation
}
#--------------------------------------------------------------------------------------------------#
Function Read-EndOfFileByByteChunk($fileName,$totalNumberOfLines,$byteChunk) {
 if($totalNumberOfLines -lt 1) { $totalNumberOfLines = 1 }
 if($byteChunk -le 0) { $byteChunk = 10240 }
 $linesOfText = New-Object System.Collections.ArrayList
 if([System.IO.File]::Exists($fileName)) {
  $fileStream = New-Object System.IO.FileStream($fileName,[System.IO.FileMode]::Open,[System.IO.FileAccess]::Read,[System.IO.FileShare]::ReadWrite)
  $asciiEncoding = New-Object System.Text.ASCIIEncoding
  $fileSize = $fileStream.Length
  $byteOffset = $byteChunk
  [byte[]] $bytesRead = New-Object byte[] $byteChunk
  $totalBytesProcessed = 0
  $lastReadAttempt = $false
  do {
   if($byteOffset -ge $fileSize) {
    $byteChunk = $fileSize - $totalBytesProcessed
    [byte[]] $bytesRead = New-Object byte[] $byteChunk
    $byteOffset = $fileSize
    $lastReadAttempt = $true
   }
   $fileStream.Seek((-$byteOffset), [System.IO.SeekOrigin]::End) | Out-Null
   $fileStream.Read($bytesRead, 0, $byteChunk) | Out-Null
   $chunkOfText = New-Object System.Collections.ArrayList
   $chunkOfText.AddRange(([System.Text.RegularExpressions.Regex]::Split($asciiEncoding.GetString($bytesRead),"\r\n")))
   $firstLineLength = ($chunkOfText[0].Length)
   $byteOffset = ($byteOffset + $byteChunk) - ($firstLineLength)
   if($lastReadAttempt -eq $false -and $chunkOfText.count -lt $totalNumberOfLines) {
    $chunkOfText.RemoveAt(0)
   }
   $totalBytesProcessed += ($byteChunk - $firstLineLength)
   $linesOfText.InsertRange(0, $chunkOfText)
  } while($totalNumberOfLines -ge $linesOfText.count -and $lastReadAttempt -eq $false -and $totalBytesProcessed -lt $fileSize)
  $fileStream.Close()
  if($linesOfText.count -gt 1) {
   $linesOfText.RemoveAt($linesOfText.count-1)
  }
  $deltaLines = ($linesOfText.count - $totalNumberOfLines)
  if($deltaLines -gt 0) {
   $linesOfText.RemoveRange(0, $deltaLines)
  }
 } else {
  $linesOfText.Add("[ERROR] $fileName not found") | Out-Null
 }
 return $linesOfText
}
#--------------------------------------------------------------------------------------------------#
Set-Variable -name returnNormal -option Constant -value 0
Set-Variable -name returnWarning -option Constant -value 1
Set-Variable -name returnError -option Constant -value 2
Set-Variable -name returnUnknown -option Constant -value 3
Set-Variable -name computerFqdn -option Constant -value (([System.Net.NetworkInformation.IPGlobalProperties]::GetIPGlobalProperties()).HostName + "." + ([System.Net.NetworkInformation.IPGlobalProperties]::GetIPGlobalProperties()).DomainName)
Set-Variable -name backupWindow -option Constant -value 24 # in Hours
Set-Variable -name deafService -option Constant -value 36 # in Hours
Set-Variable -name enableRestarts -option Constant -value $true # Allow check to restart TSM if the service
Set-Variable -name lookBack -option Constant -value 250 # Number of lines to tail
Set-Variable -name maximumFailures -option Constant -value 5 # Your tolerance for failed files
Set-Variable -name successfulBackup -value $false
Set-Variable -name todaysBackupFound -value $false
Set-Variable -name backupStillRunning -value $false
Set-Variable -name completionTime -value $null
Set-Variable -name totalFailed -value 0
Set-Variable -name logEntries -value @()
Set-Variable -name exitMessage -value "Massive Script Failure"
Set-Variable -name exitValue -value $returnError
#--------------------------------------------------------------------------------------------------#
if($tsmFile -eq "$" -or (!$tsmFile)) {
 $tsmInfo = @(Get-TSMInformation)
 foreach($tsmInstance in $tsmInfo) {
  if($tsmInstance.scheduleLog -match "\\dsmsched.log") {
   $tsmLogFile = $tsmInstance.scheduleLog
   break
  }
 }
} else {
 $tsmLogFile = ($env:programfiles + "\Tivoli\TSM\baclient\$tsmFile")
}
 
if(Test-Path -path $tsmLogFile) {
 $logEntries = Read-EndOfFileByByteChunk $tsmLogFile $lookBack 1280
  
 foreach($logEntry in $logEntries) {
  if($logEntry.Length -ge 19) {
   $dateTest = $logEntry.SubString(0,19) -as [DateTime]
   if($dateTest) {
    if(((Get-Date) - (Get-Date $logEntry.SubString(0,19))).TotalHours -le $backupWindow) {
     if($logEntry -match "Scheduled event '(.*?)' completed successfully.") {
      $successfulBackup = $true
      $completionTime = Get-Date $logEntry.SubString(0,19)
     }
     if($logEntry -match "Total number of objects failed:") {
      [int]$totalFailed = ($logEntry -Replace "(.*)Total number of objects failed:", "").Trim()
     }
     $todaysBackupFound = $true
    }
    $lastLine = $logEntry
   }
  }
 }
  
if($successfulBackup -eq $false -and $todaysBackupFound -eq $true) {
 $lastLogTime = ((Get-Date) - (Get-Date $lastLine.SubString(0,19))).TotalMinutes
 if($lastLogTime -le 15) {
  $backupStillRunning = $true
 }
}
  
if($todaysBackupFound -eq $false) {
 if(((Get-Date) - (Get-Date $lastLine.SubString(0,19))).TotalHours -ge $deafService) {
  $tsmInformation = @(Get-TSMInformation)
  if($tsmInformation.Count -gt 0) {
   foreach($tsmInstance in $tsmInformation) {
    if($tsmInstance.scheduleLog -eq $tsmLogFile) {
     if($tsmInstance.status -eq "Running") {
      Restart-Service -name $tsmInstance.tsmServiceName
      $exitMessage = ("TSM Scheduler `"" + $tsmInstance.tsmServiceName + "`" has not contacted the TSM server in $deafService hours. Restarting service.")
     } else {
      Start-Service -name $tsmInstance.tsmServiceName
      $exitMessage = ("TSM Scheduler `"" + $tsmInstance.tsmServiceName + "`" was stopped and hasn't contacted the TSM server in $deafService hours. Starting service.")
     }
     $exitValue = $returnError
     break
    }
   }
  } else {
   $exitMessage = ("Unable to determine which service is associated to $tsmLogFile")
   $exitValue = $returnError
  }
 } else {
  $exitMessage = ("Unable to find data in the last $backupWindow hours in $tsmLogFile. Last Backup log date: " + (Get-Date $lastLine.SubString(0,19)))
  $exitValue = $returnError
 }
} elseif($totalFailed -gt $maximumFailures) {
 $exitMessage = "Backup completed with $totalFailed failed objects."
 $exitValue = $returnWarning
} elseif($successfulBackup -eq $true) {
 $exitMessage = "Backup completed successfully: $completionTime"
 $exitValue = $returnNormal
} elseif($backupStillRunning -eq $true) {
 $exitMessage = ("Backup still running! Please allow to complete. Current status: " + $lastLine -Replace "\\","/")
 $exitValue = $returnWarning
} else {
 $exitMessage = ("Unable to find a successful backup. Last status: " + $lastLine -Replace "\\","/")
 $exitValue = $returnError
}

Write-Host $exitMessage
$Host.SetShouldExit($exitValue)

2 comments:

  1. Thank you for this great script! It works really good for me!

    Frank

    ReplyDelete
  2. Cool Script, thx for publishing!

    One note though: enableRestarts variable is not checked in the Service Restart section

    ReplyDelete