#!/usr/bin/perl -w

# parse file.alert mon logs and report (up|down)time of services
#
# 2003-09-03 Dobrica Pavlinusic <dpavlin@rot13.org>
# 2003-10-05 converted to CGI script
# 2004-07-18 added selection of log file
#

use strict;
use POSIX qw(strftime);
use CGI;
use CGI::Carp qw(fatalsToBrowser);
use Data::Sorting qw(:arrays);
use Time::ParseDate;
use Time::Available;
use Cache::FileCache;

use Data::Dumper;

# where are mon logs?
my $log_dir='/var/log/mon';

my $date_fmt = "%Y-%m-%d";
#my $date_time_fmt = "%Y-%m-%d %H:%M:%S";
my $date_time_fmt = "<small>%a</small> <nobr>%Y-%m-%d</nobr> %H:%M:%S";

my $from_date = "now - 6 months";
my $to_date = "now";

# working days definition (1-7; mon=1)
my $dayMask = Time::Available::DAY_WEEKDAY;
# working hours
my $from_time_interval = "7:00";
my $to_time_interval = "17:00";

my $debug=0;
$debug++ if (grep(/-v/,@ARGV));
$debug++ if (grep(/-d/,@ARGV));

my %days = (
	Time::Available::DAY_MONDAY=>'Mo',
	Time::Available::DAY_TUESDAY=>'Tu',
	Time::Available::DAY_WEDNESDAY=>'We',
	Time::Available::DAY_THURSDAY=>'Th',
	Time::Available::DAY_FRIDAY=>'Fr',
	Time::Available::DAY_SATURDAY=>'Sa',
	Time::Available::DAY_SUNDAY=>'Su'
);

my $q = new CGI;

my $print_orphans = $q->param('print_orphans') || 0;
my $rep_reset = $q->param('rep_reset') || 0;
my @sg_selected = $q->param('sg_filter');

# init misc sort parametars
my @sort_rules;
my $order;
my %sort_param;
my ($usort,$dsort);
if ($q->param('usort')) {
	$sort_param{'usort'} = $q->param('usort');
	$q->delete('usort');
	@sort_rules = ( -compare => 'numeric', scalar $sort_param{'usort'} );
}
if ($q->param('dsort')) {
	$sort_param{'dsort'} = $q->param('dsort');
	$q->delete('dsort');
	@sort_rules = ( -compare => 'numeric', -order=>'reverse', scalar $sort_param{'dsort'} );
}

# make interval
my $working_days;
if ($q->param('use_time_limit')) {
	$dayMask=0;
	foreach my $dm ($q->param('day_interval')) {
		$dayMask |= $dm;
	}
	$working_days=new Time::Available(start=>$q->param('from_time_interval'),end=>$q->param('to_time_interval'),dayMask=>$dayMask);
}

# init cache and setup expriration
my $cache = new Cache::FileCache({ default_expires_in => '10 min' });

#
# This option (activated via command switch -r) will reset failure duration
# if repeated failure on same group/service happend.
# If you want honest reporting (or grouped only by group and service),
# you souldn't turn it on :-) However, if you have just failure events in your
# log, this will produce output which will show duration BETWEEN two failures
#

# pretty format date
sub d {
	my $utime = shift || return "?";
	if ($debug) {
		return strftime($date_time_fmt." <tt>[%s]</tt>",localtime($utime));
	} else {
		return strftime($date_time_fmt,localtime($utime));
	}
}
# pretty format duration
sub dur {
	my $dur = shift || return "0";
	my $out = "";

	my $s = $dur;
	my $d = int($s/(24*60*60));
	$s = $s % (24*60*60);
	my $h = int($s/(60*60));
	$s = $s % (60*60);
	my $m = int($s/60);
	$s = $s % 60;

	$out .= $d."d " if ($d > 0);
	if ($debug) {
		$out .= sprintf("%02d:%02d:%02d <tt>[%d]</tt>",$h,$m,$s, $dur);
	} else {
		$out .= sprintf("%02d:%02d:%02d",$h,$m,$s);
	}

	return $out;
}

# read log and calculate
#

my %fail;
my $sg_filter;	# filter for service/group

my $log_file = $log_dir . '/';

$log_file .= $q->param('log_file');

opendir(DIR, $log_dir) || die "can't opendir $log_dir: $!";
my @logs = sort grep { /\.log$/i && -f "$log_dir/$_" } readdir(DIR);
closedir DIR;

my $data;

# generate unique key for this data and options
my $cache_key=$log_file.join("|",@sg_selected)."|".$print_orphans."|".$rep_reset;

# debug disables cache
if (! $debug) {
	$data = $cache->get( $cache_key );
	$sg_filter = $cache->get("sg_filter $cache_key");
}

if (!$data || !$sg_filter) {

	open(LOG, $log_file) || die "$log_file: $!";

	while(<LOG>) {
		chomp;
		if (/^(failure|up)\s+(\S+)\s+(\S+)\s+(\d+)\s+\(([^)]+)\)\s+(.+)$/) {
			my ($status,$group,$service,$utime,$date,$desc) = ($1,$2,$3,$4,$5,$6);
			my $id = "$group/$service";
			if ($status eq "up" && defined($fail{$id})) {
				if (grep(m;$group/$service;,@sg_selected)) {
					push @$data, {
						'sg'=>"$group/$service",
						'from'=>$fail{$id},
						'to'=>$utime,
						'dur'=>($utime-$fail{$id}),
						'desc'=>$desc };
				}
				$sg_filter->{"$group/$service"}++;
				delete $fail{$id};
			} elsif ($status eq "up") {
				if ($print_orphans && grep(m;$group/$service;,@sg_selected)) {
					push @$data, {
						'sg'=>"$group/$service",
						'from'=>-1,
						'to'=>$utime,
						'dur'=>0,
						'desc'=>$desc };
				}
				delete $fail{$id};
				$sg_filter->{"$group/$service"}++;
			} elsif (defined($fail{$id})) {
				if ($rep_reset && grep(m;$group/$service;,@sg_selected)) {
					push @$data, {
						'sg'=>"$group/$service",
						'from'=>$fail{$id},
						'to'=>$utime,
						'dur'=>($utime-$fail{$id}),
						'desc'=>'[failure again]'};
					$fail{$id} = $utime;
				}
				$sg_filter->{"$group/$service"}++;
			} else {
				$fail{$id} = $utime;
			}
		}
	}
	close(LOG);

	$cache->set($cache_key, $data);
	$cache->set("sg_filter $cache_key", $sg_filter);

}

# generate output
#
print $q->header,$q->start_html("mon availiability report");

# make some filters
#

print $q->start_form,'
	<table border=0 cellspacing=0 cellpadding=2>
	<tr><td valign="top">
	Log file: ',$q->popup_menu(-name=>'log_file', -values=>\@logs);

if (! $q->param('log_file')) {
	print
	'<br>',
	$q->submit(-name=>'show',-value=>'Select this log'),
	$q->end_form,
	'</td></tr></table>',
	$q->end_html;
	exit;
}

print '<br>
	<em>Show just service/group:</em><br>
	',$q->checkbox_group(-name=>'sg_filter',
		-values=>[keys %$sg_filter],
		-default=>[keys %$sg_filter],
		-linebreak=>'true',
		),'
	</td><td valign="top">
	<em>Other options:</em><br>',
	$q->checkbox(-name=>'rep_reset',-checked=>0,
	-label=>"show repeated failures on same service as individual failures"),'<br>',
	$q->checkbox(-name=>'print_orphans',-checked=>0,
	-label=>"show records which are not complete in this interval"),'<br>',
	$q->checkbox(-name=>'use_date_limit',-checked=>1,
	-label=>"use date limit from:"),
	$q->textfield(-name=>'from_date',-size=>20,-default=>$from_date),' to: ',
	$q->textfield(-name=>'to_date',-size=>20,-default=>$to_date),'
	<small>Using <a href="http://search.cpan.org/search?mode=module&query=Time::ParseDate">Time::ParseDate</a></small>
	<br>
	',$q->checkbox(-name=>'use_time_limit',-checked=>1, -value=>'on',
	-label=>"use time limit for each day:"),
	$q->textfield(-name=>'from_time_interval',-size=>8,-default=>$from_time_interval),' to: ',
	$q->textfield(-name=>'to_time_interval',-size=>8,-default=>$to_time_interval),
	'<br>Days: ',
	$q->checkbox_group(-name=>'day_interval',
		-values=>[ sort { $a <=> $b } keys %days ],
		-labels=>\%days,
		-defaults=>[ 
			Time::Available::DAY_MONDAY,
			Time::Available::DAY_TUESDAY,
			Time::Available::DAY_WEDNESDAY,
			Time::Available::DAY_THURSDAY,
			Time::Available::DAY_FRIDAY,
		]
	),
	'<br>',$q->checkbox(-name=>'hide_zero_int',
	-label=>'hide intervals with 0 time'),'<br>',
	$q->submit(-name=>'show',-value=>'Show report'),'
	</td></tr>
	</table>
	',$q->end_form;

# bail out of no data
if (!defined($data) || scalar @$data < 1) {
	print $q->end_html;
	exit;
}

# dump report
#

my %dir_html_entity = (
#	'u' => '&uArr;',
#	'd' => '&dArr;'
	'u' => '&#9650;',
	'd' => '&#9660;',
);

sub sort_link {
	my $q = shift || return;
	my $col = shift || return;
	my $dir = lc(shift) || return;
	if ($sort_param{$dir.'sort'} && $sort_param{$dir.'sort'} eq $col) {
		return $dir_html_entity{$dir};
	} else {
		return '<a href="'.$q->url(-query=>1).'&'.$dir.'sort='.$col.'">'.$dir_html_entity{$dir}.'</a>';
	}
}


my ($from_time,$to_time,$from_html,$to_html);
if ($q->param('use_date_limit')) {
	$from_time = parsedate($q->param('from_date'), UK=>1);
	$to_time = parsedate($q->param('to_date'), UK=>1);
	$from_html = strftime($date_fmt,localtime($from_time));
	$to_html = strftime($date_fmt,localtime($to_time));
	$from_html .= " [$from_time] " if ($debug);
	$to_html .= " [$to_time] " if ($debug);
}

# sort data
#
my @sorted = sorted_array( @$data, @sort_rules );

print "-- sort: ",Dumper(@sort_rules)," (data: ".@$data." sorted: ".@sorted.") --\n<br>-- dayMask: $dayMask --\n<br>-- cache_key: $cache_key --\n<br>" if ($debug);

print '<table border=1 cellspacing=0 cellpadding=2 width="100%">
	<tr>
	<th>group/service</th><th bgcolor="#f0f0f0"><nobr>',
	&sort_link($q,'from','u'),' from ',&sort_link($q,'from','d'),'</nobr>';
print '<br>',$from_html if ($from_html);
print '</th><th><nobr>',
	&sort_link($q,'to','u'),' to ',&sort_link($q,'to','d'),'</nobr>';
print '<br>',$to_html if ($to_html);
print '</th><th bgcolor="#e0e0e0"><nobr>',
	&sort_link($q,'dur','u'),' duration ',&sort_link($q,'dur','d'),'</nobr>
	</th><th>description</th>
	</tr>';

my $downtime;		# total downtime
my $downinterval;	# total downtime in time interval
my $sg_count;		# count number of downtimes

foreach my $row (@sorted) {
	next if ($q->param('use_date_limit') && ($row->{from} < $from_time || $row->{to} > $to_time));
	my ($from,$dur,$int) = ('unknown','unknown','unknown');

	if ($row->{from} != -1 ) {
		$from = d($row->{from});
		$dur = $row->{to} - $row->{from};
		$downtime->{$row->{sg}} += $dur;
		if ($q->param('use_time_limit')) {
			$int = $working_days->interval($row->{from},$row->{to});
			$dur = dur($int)."<br><nobr><small>&sum; ".dur($dur)."</small></nobr>";
			$downinterval->{$row->{sg}} += $int;
		} else {
			$dur = dur($dur);
		}
	}
	$sg_count->{$row->{sg}}++;

	next if ($q->param('hide_zero_int') && $working_days->interval($row->{from},$row->{to}) == 0);

	print '<tr>
		<td align="left" valign="center">',$row->{sg},'</td>
		<td align="right" bgcolor="#f0f0f0">',$from,'</td>
		<td align="right">',d($row->{to}),'</td>
		<td align="center" bgcolor="#e0e0e0">',$dur,'</td>
		<td align="left">',$row->{desc},'</td>
		</tr>';
}

# dump totals
#

my $dur;
foreach my $sg (keys %$downtime) {
	if ($downinterval->{$sg}) {
		$dur=dur($downinterval->{$sg})."<br><nobr><small>&sum; ".dur($downtime->{$sg})."</small></nobr>";
	} else {
		$dur=dur($downtime->{$sg});
	}

	my ($pcnt_interval,$pcnt_total) = ('','');

	if ($from_time && $to_time && $working_days) {
		my $int = $working_days->interval($from_time,$to_time);
		my $tot = $to_time - $from_time;

		if ($debug) {
			$pcnt_interval .= "[<tt>$downinterval->{$sg} $int</tt>]";
			$pcnt_total .= "[<tt>$downtime->{$sg} $tot</tt>]";
		}
		$pcnt_interval .= sprintf("%.2f%% availability in interval", 100-($downinterval->{$sg}*100 / $int));
		$pcnt_total .= sprintf("%.2f%% availability total", 100-($downtime->{$sg}*100 / $tot));
	}
	
	print '<tr><td colspan=3 align="right">total for ',$sg,'</td>
		<td bgcolor="#e0e0e0" align="right">',$dur,'</td>
		<td><small>
			'.$pcnt_interval.'
			(',$sg_count->{$sg},' failures)
			<br>'.$pcnt_total.'
			</small></td>
		</tr>';
}

print "</table>",
	$q->end_html;


