Project

General

Profile

Download (131 KB) Statistics
| Branch: | Revision:
1
#!/usr/bin/perl
2
#
3
# mon - schedules service tests and triggers alerts upon failures
4
#
5
# Jim Trocki, trockij@arctic.org
6
#
7
# $Id: mon.pl,v 1.1 2012-10-23 19:57:32 cabo Exp $
8
#
9
# Copyright (C) 1998 Jim Trocki
10
#
11
#    This program is free software; you can redistribute it and/or modify
12
#    it under the terms of the GNU General Public License as published by
13
#    the Free Software Foundation; either version 2 of the License, or
14
#    (at your option) any later version.
15
#
16
#    This program is distributed in the hope that it will be useful,
17
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
18
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
#    GNU General Public License for more details.
20
#
21
#    You should have received a copy of the GNU General Public License
22
#    along with this program; if not, write to the Free Software
23
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24
#
25
#
26
use strict;
27

    
28
my $RCSID='$Id: mon.pl,v 1.1 2012-10-23 19:57:32 cabo Exp $';
29
my $AUTHOR='trockij@arctic.org';
30
my $RELEASE='$Name:  $';
31

    
32
#
33
# NetBSD rc.d script compatibility
34
#
35
$0= "mon" . " " . join(" ", @ARGV) if $^O eq "netbsd";
36

    
37
#
38
# modules in the perl distribution
39
#
40
use Getopt::Long qw(:config no_ignore_case);
41
use Text::ParseWords;
42
use POSIX;
43
use Fcntl;
44
use Socket;
45
use Sys::Hostname;
46
use Sys::Syslog qw(:DEFAULT);
47
use FileHandle;
48

    
49
use Data::Dumper;
50

    
51
#
52
# CPAN modules
53
#
54
use Time::HiRes qw(gettimeofday tv_interval usleep);
55
use Time::Period;
56

    
57
sub auth;
58
sub call_alert;
59
sub check_auth;
60
sub clear_timers;
61
sub client_accept;
62
sub client_close;
63
sub client_command;
64
sub client_dopending;
65
sub client_write_opstatus;
66
sub collect_output;
67
sub daemon;
68
sub debug;
69
sub debug_dir;
70
sub dep_ok;
71
sub dep_summary;
72
sub depend;
73
sub dhmstos;
74
sub die_die;
75
sub disen_host;
76
sub disen_service;
77
sub disen_watch;
78
sub do_alert;
79
sub do_startup_alerts;
80
sub err_startup;
81
sub esc_str;
82
sub gen_scriptdir_hash;
83
sub handle_io;
84
sub handle_trap;
85
sub handle_trap_timeout;
86
sub host_exists;
87
sub host_singleton_group;
88
sub inRange;
89
sub init_cf_globals;
90
sub init_globals;
91
sub load_auth;
92
sub load_state;
93
sub normalize_paths;
94
sub mysystem;
95
sub init_dtlog;
96
sub pam_conv_func;
97
sub proc_cleanup;
98
sub process_event;
99
sub randomize_startdelay;
100
sub read_cf;
101
sub readhistoricfile;
102
sub reload;
103
sub remove_proc;
104
sub reset_server;
105
sub run_monitor;
106
sub save_state;
107
sub set_last_test;
108
sub set_op_status;
109
sub reset_timer;
110
sub setup_server;
111
sub sock_write;
112
sub syslog_die;
113
sub un_esc_str;
114
sub usage;
115
sub write_dtlog;
116

    
117
#
118
# globals
119
#
120
my %opt;		# cmdline arguments
121
my %CF;			# configuration directives
122
my $PWD;		# current working directory
123
my $HOSTNAME;		# system hostname
124
my $STOPPED;		# 1 = scheduler stopped, 0 = not stopped
125
my $STOPPED_TIME;	# time(2) scheduler was stopped, if stopped
126
my $SLEEPINT;		# don't touch
127
my %watch_disabled;	# watches disabled, indexed by watch
128
my %watch;		# main configuration file data structure
129
my %alias;		# aliases
130
my %groups;		# hostgroups, indexed by group
131
my %views;		# view lists, indexed by name
132
my %view_users;         # view preferences, per user
133

    
134
#
135
# I/O routine globals
136
#
137
my %clients;		# fds of connected clients
138
my $numclients;		# count of connected clients
139
my %running;		# procs which are forked and running,
140
			# indexed by group/service
141
my $iovec;		# used for select loop
142
my %runningpid;		# procs which are forked and running,
143
			# indexed by PID
144
my $procs;		# number of outstanding procs
145
my %fhandles;		# input file handles of children
146
my %ibufs;		# buffer structure to hold data from children
147
my ($fdset_rbits, $fdset_ebits);
148

    
149
#
150
# history globals
151
#
152
my @last_alerts;	# alert history, in memory
153
my @last_failures;	# failure history, in memory
154

    
155
#
156
# misc. globals
157
#
158
my $i;			# loop iteration counter, used for debugging only
159
my $lasttm;		# the last time(2) the mon loop started
160
my $pid_file_owner;	# set when creating pid file
161
my $tm;			# used in main loop
162

    
163
#
164
# authentication structure globals
165
#
166
my %AUTHCMDS;
167
my %NOAUTHCMDS;
168
my %AUTHTRAPS;
169

    
170
#
171
# PAM authentication globals (must not be lexically scoped)
172
#
173
use vars qw ( $PAM_username $PAM_password ) ;
174

    
175

    
176
#
177
# opstatus globals
178
#
179
my (%OPSTAT, %FAILURE, %SUCCESS, %WARNING);	# operational statuses
180
my ($TRAP_COLDSTART, $TRAP_WARMSTART,		# trap types
181
	$TRAP_LINKDOWN, $TRAP_LINKUP,
182
	$TRAP_AUTHFAIL, $TRAP_EGPNEIGHBORLOSS,
183
	$TRAP_ENTERPRISE, $TRAP_HEARTBEAT);
184

    
185
my ($STAT_FAIL, $STAT_OK, $STAT_COLDSTART,	# _op_status values
186
	$STAT_WARMSTART, $STAT_LINKDOWN,
187
	$STAT_UNKNOWN, $STAT_TIMEOUT,
188
	$STAT_UNTESTED, $STAT_DEPEND, $STAT_WARN);
189

    
190
my ($FL_MONITOR, $FL_UPALERT,			# alert type flags
191
	$FL_TRAP, $FL_TRAPTIMEOUT,
192
	$FL_STARTUPALERT, $FL_TEST, $FL_REDISTRIBUTE,
193
        $FL_ACKALERT, $FL_DISABLEALERT);
194

    
195
my $TRAP_PDU;
196
my (%ALERTHASH, %MONITORHASH);			# hash of pathnames for
197
						# alerts/monitors
198
my $PROT_VERSION;
199
my $START_TIME;					# time(2) server started
200
my $TRAP_PRO_VERSION;				# trap protocol version
201
my $DEP_EVAL_SANDBOX;				# perl environment for
202
						# dep evals
203

    
204
#
205
# argument parsing
206
#
207
my $getopt_result = GetOptions(\%opt,
208
			       qw/
209
				  A|authfile=s
210
				  B|cfbasedir=s
211
				  D|statedir=s
212
				  L|logdir=s
213
				  M|m4:s
214
				  O|syslogfacility=s
215
				  P|pidfile=s
216
				  S|stopped
217
				  a|alertdir=s
218
				  b|basedir=s
219
				  c|configfile=s
220
				  d|debug+
221
				  f|fork
222
				  h|help
223
				  i|sleep=i
224
				  k|maxkeep=i
225
				  l|loadstate:s
226
				  m|maxprocs=i
227
				  p|port=i
228
				  r|randstart=s
229
				  s|scriptdir=s
230
				  t|trapport=i
231
				  v|version
232
				  /);
233

    
234
if (!$getopt_result) {
235
  usage();
236
  exit;
237
}
238

    
239
#
240
# these two things can be taken care of without
241
# initializing things further
242
#
243
if ($opt{"v"}) {
244
    print "$RCSID\n$RELEASE\n";
245
    exit;
246
}
247

    
248
if ($opt{"h"}) {
249
    usage();
250
    exit;
251
}
252

    
253
if ($opt{"d"})
254
{
255
    eval 'require Data::Dumper;';
256

    
257
    if ($@ ne "")
258
    {
259
    	die "error: $@\n";
260
    }
261
}
262

    
263
if ($^O eq "linux" || $^O =~ /^(open|free|net)bsd$/ || $^O eq "aix")
264
{
265
    Sys::Syslog::setlogsock ('unix');
266
}
267

    
268
elsif ($^O eq "solaris")
269
{
270
    Sys::Syslog::setlogsock ('stream');
271
}
272

    
273
openlog ("mon", "cons,pid", $CF{"SYSLOG_FACILITY"});
274

    
275
#
276
# definitions
277
#
278
die "basedir $opt{b} does not exist\n" if ($opt{"b"} && ! -d $opt{"b"});
279

    
280
init_globals();
281
init_cf_globals();
282

    
283
syslog_die ("config file $CF{CF} does not exist") if (! -f $CF{"CF"});
284

    
285
#
286
# read config file
287
#
288
if ((my $err = read_cf ($CF{"CF"}, 1)) ne "") {
289
    syslog_die ("$err");
290
}
291

    
292
closelog;
293

    
294
openlog ("mon", "cons,pid", $CF{"SYSLOG_FACILITY"});
295

    
296
#
297
# cmdline args override config file
298
#
299
$CF{"ALERTDIR"}  = $opt{"a"} if ($opt{"a"});
300
$CF{"BASEDIR"}   = $opt{"b"} if ($opt{"b"});
301
$CF{"AUTHFILE"}  = $opt{"A"} if ($opt{"A"});
302
$CF{"LOGDIR"}    = $opt{"L"} if ($opt{"L"});
303
$CF{"STATEDIR"}  = $opt{"D"} if ($opt{"D"});
304
$CF{"SCRIPTDIR"} = $opt{"s"} if ($opt{"s"});
305

    
306
$CF{"PIDFILE"}   = $opt{"P"} if defined($opt{"P"});	# allow empty pidfile
307
$CF{"MAX_KEEP"}  = $opt{"k"} if ($opt{"k"});
308
$CF{"MAXPROCS"}  = $opt{"m"} if ($opt{"m"});
309
$CF{"SERVPORT"}  = $opt{"p"} if ($opt{"p"});
310
$CF{"TRAPPORT"}  = $opt{"t"} if ($opt{"t"});
311

    
312
$SLEEPINT  = $opt{"i"} if ($opt{"i"});
313

    
314
if ($opt{"r"}) {
315
    syslog_die ("bad randstart value") if (!defined (dhmstos ($opt{"r"})));
316
    $CF{"RANDSTART"} = dhmstos($opt{"r"});
317
}
318

    
319
if ($opt{"S"}) {
320
    $STOPPED = 1;
321
    $STOPPED_TIME = time;
322
}
323

    
324

    
325
#
326
# do some path cleanups and
327
# build lookup tables for alerts and monitors
328
#
329
normalize_paths();
330
gen_scriptdir_hash();
331

    
332
if ($opt{"d"}) {
333
    debug_dir();
334
}
335

    
336
#
337
# load the auth control, bind, and listen
338
#
339
load_auth (1);
340
load_view_users(1);
341

    
342
#
343
# init client interface
344
#   %clients is an I/O structure, indexed by the fd of the client
345
#   $numclients is the number of clients currently connected
346
#   $iovec is fd_set for clients and traps
347
#
348
%clients = ();
349
$numclients = 0;
350
$iovec = '';
351
setup_server();
352

    
353
#
354
# fork and become a daemon
355
#
356
init_dtlog() if ($CF{"DTLOGGING"});
357
daemon() if ($opt{"f"});
358
if ($CF{"PIDFILE"} ne '' && open PID, ">$CF{PIDFILE}") {
359
    $pid_file_owner = $$;
360
    print PID "$pid_file_owner\n";
361
    close PID;
362
}
363
set_last_test ();
364

    
365
#
366
# randomize startup checks if asked to
367
#
368
randomize_startdelay() if ($CF{"RANDSTART"});
369

    
370
@last_alerts = ();
371
@last_failures = ();
372
readhistoricfile ();
373

    
374
$procs = 0;
375
$i=0;
376
$lasttm=time;
377
$fdset_rbits = $fdset_ebits = '';
378
%watch_disabled = ();
379

    
380
$SIG{HUP} = \&reset_server;
381
$SIG{INT} = \&handle_sigterm;		# for interactive debugging
382
$SIG{TERM} = \&handle_sigterm;
383
$SIG{PIPE} = 'IGNORE';
384

    
385
#
386
# load previously saved state
387
#
388
if (exists $opt{"l"}) {
389
    if ($opt{"l"}) {
390
	# If -l was given an argument (all, disabled, opstatus, etc...)
391
	# pass that to load_state
392
	load_state($opt{"l"});
393
    }else{
394
	# Otherwise default to old behavior of just loading disabled hosts/services/groups
395
	load_state("disabled");
396
    }
397
}
398

    
399

    
400

    
401
syslog ('info', "mon server started");
402

    
403
#
404
# startup alerts
405
#
406
do_startup_alerts();
407

    
408
#
409
# main monitoring loop
410
#
411
for (;;) {
412
debug (1, "$i" . ($STOPPED ? " (stopped)" : "") . "\n");
413
    $i++;
414
    $tm = time;
415

    
416
    #
417
    # step through the watch groups, decrementing and
418
    # handing expired timers
419
    #
420
    if (!$STOPPED) {
421
	if (defined $CF{"EXCLUDE_PERIOD"}
422
	    && $CF{"EXCLUDE_PERIOD"} ne "" &&
423
	    inPeriod (time, $CF{"EXCLUDE_PERIOD"})) {
424
	    debug (1, "not running monitors because of global exclude_period\n");
425
	} else {
426
	    foreach my $group (keys %watch) {
427
		foreach my $service (keys %{$watch{$group}}) {
428

    
429
		    my $sref = \%{$watch{$group}->{$service}};
430

    
431
		    my $t = $tm - $lasttm;
432
		    $t = 1 if ($t <= 0);
433

    
434
		    #
435
		    # trap timer
436
		    #
437
		    if ($sref->{"traptimeout"}) {
438
			$sref->{"_trap_timer"} -= $t;
439
			
440
			if ($sref->{"_trap_timer"} <= 0 && 
441
			    $tm - $sref->{"_last_trap"} > $sref->{"traptimeout"}) 
442
			  {
443
			      $sref->{"_trap_timer"} = $sref->{"traptimeout"};
444
			      handle_trap_timeout ($group, $service);
445
			  }
446
		    }
447

    
448
		    #
449
		    # trap duration timer
450
		    #
451
		    if (defined ($sref->{"_trap_duration_timer"})) {
452
			$sref->{"_trap_duration_timer"} -= $t;
453
			
454
			if ($sref->{"_trap_duration_timer"} <= 0) {
455
			    set_op_status ($group, $service, $STAT_OK);
456
			    undef $sref->{"_trap_duration_timer"};
457
			}
458
		    }
459

    
460
		    #
461
		    # polling monitor timer
462
		    #
463
		    if ($sref->{"interval"} && $sref->{"_timer"} <= 0 &&
464
			!$running{"$group/$service"})
465
		      {
466
			  if (!$CF{"MAXPROCS"} || $procs < $CF{"MAXPROCS"})
467
			    {
468
				if (defined $sref->{"exclude_period"} 
469
				    && $sref->{"exclude_period"} ne "" &&
470
				    inPeriod (time, $sref->{"exclude_period"}))
471
				  {
472
				      debug (1, "not running $group,$service because of exclude_period\n");
473
				  }
474

    
475
				elsif (($sref->{"dep_behavior"} eq "m" &&
476
					defined $sref->{"depend"} && $sref->{"depend"} ne "")
477
				       || (defined $sref->{"monitordepend"} && $sref->{"monitordepend"} ne "")) 
478
				  {
479
				      if (dep_ok ($sref, 'm'))
480
					{
481
					    run_monitor ($group, $service);
482
					}
483

    
484
				      else
485
					{
486
					    debug (1, "not running $group,$service because of depend\n");
487
					}
488
				  }
489

    
490
				else
491
				  {
492
				      run_monitor ($group, $service);
493
				  }
494
			    }
495

    
496
			  else
497
			    {
498
				syslog ('info', "throttled at $procs processes");
499
			    }
500
		      }
501

    
502
		    else
503
		      {
504
			  $sref->{"_timer"} -= $t;
505
			  if ($sref->{"_timer"} < 0)
506
			    {
507
				$sref->{"_timer"} = 0;
508
			    }
509
		      }
510
		}
511
	    }
512
	}
513
    }
514

    
515
    $lasttm = time;
516

    
517
    #
518
    # collect any output from subprocs
519
    #
520
    collect_output;
521

    
522
    #
523
    # clean up after exited processes, and trigger alerts
524
    #
525
    proc_cleanup;
526

    
527
    #
528
    # handle client, server, and trap I/O
529
    # this routine sleeps for $SLEEPINT if no I/O is ready
530
    #
531
    handle_io;
532
}
533

    
534
die "not reached";
535

    
536
END {
537
    unlink $CF{"PIDFILE"} if $$ == $pid_file_owner && $CF{"PIDFILE"} ne '';
538
}
539

    
540

    
541
##############################################################################
542

    
543
#
544
# startup alerts
545
#
546
sub do_startup_alerts {
547
    foreach my $group (keys %watch) {
548
    	foreach my $service (keys %{$watch{$group}}) {
549
	    do_alert ($group, $service, "", 0, $FL_STARTUPALERT);
550
	}
551
    }
552
}
553

    
554

    
555
#
556
# handle alert event, throttling the alert call if necessary
557
#
558
sub do_alert {
559
    my ($group, $service, $output, $retval, $flags) = @_;
560
    my (@groupargs, $last_alert, $alert);
561
    my ($sref, $range, @alerts);
562

    
563
debug (1, "do_alert flags=$flags\n");
564

    
565
    $sref = \%{$watch{$group}->{$service}};
566

    
567
    my $tmnow = time;
568

    
569
    if ($STOPPED) {
570
      syslog ("notice", "ignoring alert for $group,$service because the mon scheduler is stopped");
571
      return;
572
    }
573

    
574
    #
575
    # if redistribute it set, call it now
576
    #
577
    if ($sref->{"redistribute"} ne '') 
578
    {
579
        my ($fac, $args);
580
        ($fac, $args) = split (/\s+/, $sref->{"redistribute"}, 2);
581
        call_alert (
582
                    group       => $group,
583
                    service     => $service,
584
                    output      => $output,
585
                    retval      => $retval,
586
                    flags       => $flags | $FL_REDISTRIBUTE,
587

    
588
                    alert       => $fac,
589
                    args        => $args,
590
                   )
591
    }
592

    
593
    #
594
    # if the alarm is disabled, ignore it
595
    #
596
    if ((exists $watch_disabled{$group} && $watch_disabled{$group} == 1) 
597
	|| (defined $sref->{"disable"} && $sref->{"disable"} == 1))
598
    {
599
	syslog ("notice", "ignoring alert for $group,$service");
600
	return;
601
    }
602

    
603
    #
604
    # dependency check
605
    #
606
    if (!($flags & $FL_STARTUPALERT) &&
607
	!($flags & $FL_UPALERT) &&
608
	((defined $sref->{"depend"} && $sref->{"dep_behavior"} eq "a")
609
	 || (defined $sref->{"alertdepend"})))
610
    {
611
	if (!$sref->{"_depend_status"})
612
	{
613
	    debug (1, "alert for $group,$service supressed because of dep fail\n");
614
	    return;
615
	}
616
    }
617

    
618
    my ($summary) = split("\n", $output);
619
    $summary = "(NO SUMMARY)" if (!defined $summary || $summary =~ /^\s*$/m);
620
    my ($prevsumm) = split("\n", $sref->{"_failure_output"}) if (defined $sref->{"_failure_output"});
621
    $prevsumm = "(NO SUMMARY)" if (!defined $prevsumm || $prevsumm =~ /^\s*$/m);
622
    
623

    
624
    my $strippedsummary = $summary;
625
    $strippedsummary =~ s/\s//mg;
626
    my $strippedprevious = $prevsumm;
627
    $strippedprevious =~ s/\s//mg;
628
    # If the summary changed, un-acknowledge the service if 'unack_summary' is set
629
    if ($sref->{'_ack'} != 0 
630
	&& $sref->{'unack_summary'} == 1 
631
	&& $strippedsummary ne $strippedprevious
632
	&& !($flags & ($FL_UPALERT|$FL_ACKALERT|$FL_DISABLEALERT))) {
633
	print STDERR "Unacking $group/$service:\nSummary: X".$strippedsummary."X\nPrevious: X".$strippedprevious."X\n";
634
	$sref->{"_ack"} = 0;
635
	$sref->{"_ack_comment"} = "";
636
        $sref->{"_consec_failures"}=1;
637
        foreach my $period (keys %{$sref->{"periods"}})
638
          {
639
            $sref->{"periods"}->{$period}->{"_last_alert"} = 0;
640
#            $sref->{"periods"}->{$period}->{"_alert_sent"} = 0;
641
            $sref->{"periods"}->{$period}->{"_1stfailtime"} = 0;
642
            $sref->{"periods"}->{$period}->{"_failcount"} = 0;
643
          }
644
    }
645

    
646
    #
647
    # no alerts for ack'd failures, except for upalerts or summary changes
648
    # when observe_summary is set
649
    #
650
    if ($sref->{"_ack"} != 0 && !($flags & ($FL_UPALERT|$FL_ACKALERT|$FL_DISABLEALERT)))
651
    {
652
	syslog ("debug", "no alert for $group.$service" .
653
		" because of ack'd failure");
654
	return;
655
    }
656

    
657
    #
658
    # check each time period for pending alerts
659
    #
660
    foreach my $periodlabel (keys %{$sref->{"periods"}})
661
    {
662
	#
663
	# only send alerts that are in the proper period
664
	#
665
    	next if (!inPeriod ($tmnow, $sref->{"periods"}->{$periodlabel}->{"period"}));
666

    
667
    	my $pref = \%{$sref->{"periods"}->{$periodlabel}};
668

    
669
	#
670
	# skip upalerts/ackalerts not paired with down alerts
671
	# disable by setting "no_comp_alerts" in period section
672
	#
673
	if (!$pref->{"no_comp_alerts"} && ($flags & ($FL_UPALERT | $FL_ACKALERT)) && !$pref->{"_alert_sent"})
674
	{
675
	    syslog ('debug', "$group/$service/$periodlabel: Suppressing upalert since no down alert was sent.") if ($flags & $FL_UPALERT);
676
	    syslog ('debug', "$group/$service/$periodlabel: Suppressing ackalert since no down alert was sent.") if ($flags & $FL_ACKALERT);
677
	    next;
678
	}
679

    
680
        #
681
        # skip looping upalerts when "no_comp-alerts" set.
682
        #
683
        if ($pref->{"no_comp_alerts"} && ($flags & $FL_UPALERT) && ($pref->{"_no_comp_alerts_upalert_sent"}>0))
684
        {   
685
            next;
686
        }
687

    
688
	#
689
	# do this if we're not handling an upalert, startupalert, ackalert, or disablealert
690
	#
691
	if (!($flags & $FL_UPALERT) && !($flags & $FL_STARTUPALERT)  && !($flags & $FL_DISABLEALERT) && !($flags & $FL_ACKALERT))
692
	{
693
  	    #
694
	    # alert only when exit code matches
695
	    #
696

    
697
	    if (exists $pref->{"alertexitrange"}) {
698
		next if (!inRange($retval, $pref->{"alertexitrange"}));
699
	    }
700

    
701
	    #
702
	    # alert only numalerts
703
	    #
704
	    if ($pref->{"numalerts"} &&
705
	    	     $pref->{"_alert_sent"} >= $pref->{"numalerts"})
706
	    {
707
                syslog ('debug', "$group/$service/$periodlabel: Suppressing alert since numalerts is met.");
708
	    	next;
709
	    }
710

    
711
	    #
712
	    # only alert once every "alertevery" seconds, unless
713
	    # output from monitor is different or if strict alertevery
714
	    #
715
	    # strict and _ignore_summary are basically the same though
716
	    # strict short-circuits and overrides other settings and exists
717
	    # for compatibility with pre-1.1 configs
718
	    #
719
	    if	($pref->{"alertevery"} != 0 &&                                                                 # if alertevery is set and
720
		 ($tmnow - $pref->{"_last_alert"} < $pref->{"alertevery"}) &&                                  # we're within the time period and one of these:
721
		 (($pref->{"_alertevery_strict"}) ||                                                           # [ strict is set or
722
		  ($pref->{"_observe_detail"} && $sref->{"_failure_output"} eq $output) ||                     # observing detail and output hasn't changed or
723
		  (!$pref->{"_observe_detail"} && (!$pref->{"_ignore_summary"}) && ($prevsumm eq $summary)) || # not observing detail
724
		    											       # and not ignoring summary and summ hasn't changed or
725
		  ($pref->{"_ignore_summary"})))	                                                       # we're ignoring summary changes ]
726
	    {
727
                syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertevery.");
728
		next;
729
	    }
730

    
731
	    #
732
	    # alertafter NUM
733
	    #
734
	    if (defined $pref->{"alertafter_consec"} && ($sref->{"_consec_failures"} < $pref->{"alertafter_consec"}))
735
	    {
736
                syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter consecutive failures.");
737
	    	next;
738
	    }
739

    
740
	    #
741
	    # alertafter timeval
742
	    #
743
	    elsif ( (!defined ($pref->{"alertafter"})) && (defined ($pref->{"alertafterival"})) )
744
	    {
745
	    	$pref->{'_1stfailtime'} = $tmnow if $pref->{'_1stfailtime'} == 0;
746
		if ($tmnow - $pref->{'_1stfailtime'} <= $pref->{'alertafterival'})
747
		{
748
                    syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter numval.");
749
		    next;
750
		}
751
	    }
752

    
753
	    #
754
	    # alertafter NUM timeval
755
	    #
756
	    elsif (defined ($pref->{"alertafter"}))
757
	    {
758
		$pref->{"_failcount"}++;
759

    
760
		if ($tmnow - $pref->{'_1stfailtime'} <= $pref->{'alertafterival'} &&
761
		    $pref->{"_failcount"} < $pref->{"alertafter"})
762
		{
763
                    syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter num timeval.");
764
		    next;
765
		}
766

    
767
		#
768
		# start a new time interval
769
		#
770
		if ($tmnow - $pref->{'_1stfailtime'} > $pref->{'alertafterival'})
771
		{
772
		    $pref->{"_failcount"} = 1;
773
		}
774

    
775
		if ($pref->{"_failcount"} == 1)
776
		{
777
		    $pref->{"_1stfailtime"} = $tmnow;
778
		}
779

    
780
		if ($pref->{"_failcount"} < $pref->{"alertafter"})
781
		{
782
                    syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter num timeval.");
783
		    next;
784
		}
785
	    }
786
	}
787

    
788
	#
789
	# at this point, no alerts are blocked,
790
	# so send the alerts
791
	#
792

    
793
	#
794
	# trigger multiple alerts in this period
795
	#
796
	if ($flags & $FL_UPALERT)
797
	{
798
	    @alerts = @{$pref->{"upalerts"}};
799
	}
800
	elsif ($flags & $FL_STARTUPALERT)
801
	{
802
	    @alerts = @{$pref->{"startupalerts"}};
803
	}
804
	elsif ($flags & $FL_DISABLEALERT)
805
	{
806
	    @alerts = @{$pref->{"disablealerts"}};
807
	}
808
	elsif ($flags & $FL_ACKALERT)
809
	{
810
	    @alerts = @{$pref->{"ackalerts"}};
811
	}
812
	else
813
	{
814
	    @alerts = @{$pref->{"alerts"}};
815
	}
816

    
817
	my $called = 0;
818

    
819
	for (my $i=0;$i<@alerts;$i++)
820
	{
821
	    my ($range, $fac, $args);
822

    
823
	    if ($alerts[$i] =~ /^exit\s*=\s*((\d+|\d+-\d+))\s/i)
824
	    {
825
		$range=$1;
826
		next if (!inRange($retval, $range));
827
		($fac, $args) = (split (/\s+/, $alerts[$i], 3))[1,2];
828
	    }
829
	    else
830
	    {
831
		($fac, $args) = split (/\s+/, $alerts[$i], 2);
832
	    }
833

    
834
	    $called++ if (call_alert (
835
		    group	=> $group,
836
		    service	=> $service,
837
		    output	=> $output,
838
		    retval	=> $retval,
839
		    flags	=> $flags,
840

    
841
		    pref	=> $pref,
842
		    alert	=> $fac,
843
		    args	=> $args,
844
		)
845
	    );
846
	}
847

    
848
	#
849
	# reset _alert_sent if up alert was sent from a trap
850
	#
851
        if ($called)
852
        {
853
            if( (($FL_TRAP | $flags) && ($FL_UPALERT & $flags)) ) {
854
	        $pref->{"_alert_sent"} = 0;
855
                $pref->{"_last_alert"} = 0;
856
            }
857
            else {
858
                $pref->{"_alert_sent"}++;
859

    
860
                #
861
                # reset _no_comp_alerts_upalert_sent counter - when service will be
862
                # back up, upalert will be sent.
863
                #
864
                if ($pref->{"no_comp_alerts"}) {
865
                    $pref->{"_no_comp_alerts_upalert_sent"} = 0;
866
                }
867
            }
868

    
869
	    if ($pref->{"no_comp_alerts"} && ($flags & $FL_UPALERT)) {
870
		$pref->{"_no_comp_alerts_upalert_sent"}++;
871
	    }
872
        }
873
    }
874
}
875

    
876

    
877

    
878
#
879
# walk through the watch list and reset the time
880
# the service was last called
881
#
882
sub set_last_test {
883
    my ($i, $k, $t);
884
    $t = time;
885
    foreach $k (keys %watch)
886
    {
887
    	foreach my $service (keys %{$watch{$k}})
888
	{
889
	    $watch{$k}->{$service}->{"_timer"} = $watch{$k}->{$service}->{"interval"};
890
	}
891
    }
892

    
893
}
894

    
895

    
896
#
897
# parse configuration file
898
#
899
# build the following data structures:
900
#
901
# %group
902
#       each element of %group is an array of hostnames
903
#       group records are terminated by a blank line in the
904
#       configuration file
905
# %watch{"group"}->{"service"}->{"variable"} = value
906
# %alias
907
#
908
sub read_cf {
909
    my ($CF, $commit) = @_;
910
    my ($var, $watchgroup, $ingroup, $curgroup, $inwatch,
911
	$args, $hosts, %disabled, $h, $i,
912
	$inalias, $curalias, $inview, $curview);
913
    my ($sref, $pref);
914
    my ($service, $period);
915
    my ($authtype, @authtypes);
916
    my $line_num = 0;
917

    
918
    #
919
    # parse configuration file
920
    #
921
    if (exists($opt{"M"}) || $CF =~ /\.m4$/)
922
    {
923
        my $m4 = "m4";
924
	$m4 = $opt{"M"} if (defined($opt{"M"}));
925
	return "could not open m4 pipe of cf file: $CF: $!"
926
	    if (!open (CFG, "$m4 $CF |"));
927
    }
928

    
929
    else
930
    {
931
	return "could not open cf file: $CF: $!"
932
	    if (!open (CFG, $CF));
933
    }
934

    
935
    #
936
    # buffers to hold the new un-committed config
937
    #
938
    my %new_alias = ();
939
    my %new_views = ();
940
    my %new_CF = %CF;
941
    my %new_groups;
942
    my %new_watch;
943

    
944
    my %is_watch;
945

    
946
    my $servnum = 0;
947

    
948
    my $DEP_BEHAVIOR = "a";
949
    my $DEP_MEMORY = 0;
950
    my $UNACK_SUMMARY = 0;
951

    
952
    my $incomplete_line = 0;
953
    my $linepart = "";
954
    my $l = "";
955
    my $acc_line = "";
956

    
957
    for (;;)
958
    {
959
	#
960
	# read in a logical "line", which may span actual lines
961
	#
962
	do
963
	{
964
	    $line_num++;
965
	    last if (!defined ($linepart = <CFG>));
966
	    next if $linepart =~ /^\s*#/;
967

    
968
	    #
969
	    # accumulate multi-line lines (ones which are \-escaped)
970
	    #
971
	    if ($incomplete_line) { $linepart =~ s/^\s*//; }
972

    
973
	    if ($linepart =~ /^(.*)\\\s*$/)
974
	    {
975
		$incomplete_line = 1;
976
		$acc_line .= $1;
977
		chomp $acc_line;
978
		next;
979
	    }
980

    
981
	    else
982
	    {
983
		$acc_line .= $linepart;
984
	    }
985

    
986
	    $l = $acc_line;
987
	    $acc_line = "";
988

    
989
	    chomp $l;
990
	    $l =~ s/^\s*//;
991
	    $l =~ s/\s*$//;
992

    
993
	    $incomplete_line = 0;
994
	    $linepart = "";
995
	};
996

    
997
	#
998
	# global variables which can be overriden by the command line
999
	#
1000
	if (!$inwatch && $l =~ /^(\w+) \s* = \s* (.*) \s*$/ix)
1001
	{
1002
	    if ($1 eq "alertdir") {
1003
		$new_CF{"ALERTDIR"} = $2;
1004

    
1005
	    } elsif ($1 eq "basedir") {
1006
		$new_CF{"BASEDIR"} = $2;
1007
		$new_CF{"BASEDIR"} = "$PWD/$new_CF{BASEDIR}" if ($new_CF{"BASEDIR"} !~ m{^/});
1008
		$new_CF{"BASEDIR"} =~ s{/$}{};
1009

    
1010
	    } elsif ($1 eq "cfbasedir") {
1011
		$new_CF{"CFBASEDIR"} = $2;
1012
		$new_CF{"CFBASEDIR"} = "$PWD/$new_CF{CFBASEDIR}" if ($new_CF{"CFBASEDIR"} !~ m{^/});
1013
		$new_CF{"CFBASEDIR"} =~ s{/$}{};
1014

    
1015
	    } elsif ($1 eq "mondir") {
1016
		$new_CF{"SCRIPTDIR"} = $2;
1017

    
1018
	    } elsif ($1 eq "logdir") {
1019
		$new_CF{"LOGDIR"} = $2;
1020

    
1021
	    } elsif ($1 eq "histlength") {
1022
		$new_CF{"MAX_KEEP"} = $2;
1023

    
1024
	    } elsif ($1 eq "serverport") {
1025
		$new_CF{"SERVPORT"} = $2;
1026

    
1027
	    } elsif ($1 eq "trapport") {
1028
		$new_CF{"TRAPPORT"} = $2;
1029

    
1030
	    } elsif ($1 eq "serverbind") {
1031
	    	$new_CF{"SERVERBIND"} = $2;
1032

    
1033
	    } elsif ($1 eq "clientallow") {
1034
		$new_CF{"CLIENTALLOW"}= $2;
1035

    
1036
	    } elsif ($1 eq "trapbind") {
1037
	    	$new_CF{"TRAPBIND"} = $2;
1038

    
1039
	    } elsif ($1 eq "pidfile") {
1040
		$new_CF{"PIDFILE"} = $2;
1041

    
1042
	    } elsif ($1 eq "randstart") {
1043
		$new_CF{"RANDSTART"} = dhmstos($2);
1044
		if (!defined ($new_CF{"RANDSTART"})) {
1045
		    close (CFG);
1046
		    return "cf error: bad value '$2' for randstart option (syntax: randstart = timeval), line $line_num";
1047
		}
1048

    
1049
	    } elsif ($1 eq "maxprocs") {
1050
		$new_CF{"MAXPROCS"} = $2;
1051

    
1052
	    } elsif ($1 eq "statedir") {
1053
		$new_CF{"STATEDIR"} = $2;
1054

    
1055
	    } elsif ($1 eq "authfile") {
1056
		$new_CF{"AUTHFILE"} = $2;
1057
                if (! -r $new_CF{"AUTHFILE"}) {
1058
                    close (CFG);
1059
                    return "cf error: authfile '$2' does not exist or is not readable, line $line_num";
1060
                }
1061

    
1062
	    } elsif ($1 eq "authtype") {
1063
		$new_CF{"AUTHTYPE"} = $2;
1064
		@authtypes = split(' ' , $new_CF{"AUTHTYPE"}) ;
1065
		foreach $authtype (@authtypes) {
1066
		    if ($authtype eq "pam") {
1067
			eval 'use Authen::PAM qw(:constants);' ;
1068
			if ($@ ne "") {
1069
			    close (CFG);
1070
			    return "cf error: could not use PAM authentication: $@";
1071
			}
1072
		    }
1073
		}
1074

    
1075
	    } elsif ($1 eq "pamservice") {
1076
		$new_CF{"PAMSERVICE"} = $2;
1077

    
1078
	    } elsif ($1 eq "userfile") {
1079
		$new_CF{"USERFILE"} = $2;
1080
                if (! -r $new_CF{"USERFILE"}) {
1081
                    close (CFG);
1082
                    return "cf error: userfile '$2' does not exist or is not readable, line $line_num";
1083
                }
1084

    
1085
	    } elsif ($1 eq "historicfile") {
1086
	    	$new_CF{"HISTORICFILE"} = $2;
1087

    
1088
	    } elsif ($1 eq "historictime") {
1089
	    	$new_CF{"HISTORICTIME"} = dhmstos($2);
1090
		if (!defined $new_CF{"HISTORICTIME"}) {
1091
		    close (CFG);
1092
		    return "cf error: bad value '$2' for historictime command (syntax: historictime = timeval), line $line_num";
1093
		}
1094

    
1095
	    } elsif ($1 eq "cltimeout") {
1096
		$new_CF{"CLIENT_TIMEOUT"} = dhmstos($2);
1097
		if (!defined ($new_CF{"CLIENT_TIMEOUT"})) {
1098
		    close (CFG);
1099
		    return "cf error: bad value '$2' for cltimeout command (syntax: cltimeout = secs), line $line_num";
1100
		}
1101

    
1102
	    } elsif ($1 eq "monerrfile") {
1103
	    	$new_CF{"MONERRFILE"} = $2;
1104

    
1105
	    } elsif ($1 eq "dtlogfile") {
1106
		$new_CF{"DTLOGFILE"} = $2;
1107

    
1108
	    } elsif ($1 eq "dtlogging") {
1109
		$new_CF{"DTLOGGING"} = 0;
1110
		if ($2 == 1 || $2 eq "yes" || $2 eq "true") {
1111
		    $new_CF{"DTLOGGING"} = 1;
1112
		}
1113

    
1114
	    } elsif ($1 eq "dep_recur_limit") {
1115
	    	$new_CF{"DEP_RECUR_LIMIT"} = $2;
1116

    
1117
	    } elsif ($1 eq "dep_behavior") {
1118
		if ($2 ne "m" && $2 ne "a" && $2 ne "hm") {
1119
		    close (CFG);
1120
		    return "cf error: unknown dependency behavior '$2', line $line_num";
1121
		}
1122
		$DEP_BEHAVIOR = $2;
1123

    
1124
	    } elsif ($1 eq "dep_memory") {
1125
		my $memory = dhmstos($2);
1126
		if (!defined $memory) {
1127
		    close (CFG);
1128
		    return "cf error: bad value '$2' for dep_memory option (syntax: dep_memory = timeval), line $line_num";
1129
		}
1130
		$DEP_MEMORY = $memory;
1131

    
1132
	    } elsif ($1 eq "unack_summary") {
1133
		if (defined $2) {
1134
		    if ($2 =~ /y(es)?/i) {
1135
			$UNACK_SUMMARY = 1;
1136
		    } elsif ($2 =~ /n(o)?/i) {
1137
			$UNACK_SUMMARY = 0;
1138
		    } elsif ($2 eq "0" || $2 eq "1") {
1139
			$UNACK_SUMMARY = $2;
1140
		    } else {
1141
			return "cf error: invalid unack_summary value '$2' (syntax: unack_summary [0|1|y|yes|n|no])";
1142
		    }
1143
		} else {
1144
		    $UNACK_SUMMARY = 1;
1145
		}
1146

    
1147
	    } elsif ($1 eq "syslog_facility") {
1148
	    	$new_CF{"SYSLOG_FACILITY"} = $2;
1149

    
1150
	    } elsif ($1 eq "startupalerts_on_reset") {
1151
		if ($2 =~ /^1|yes|true|on$/i) {
1152
		    $new_CF{"STARTUPALERTS_ON_RESET"} = 1;
1153
		} else {
1154
		    $new_CF{"STARTUPALERTS_ON_RESET"} = 0;
1155
		}
1156

    
1157
	    } elsif ($1 eq "monremote") {
1158
		$new_CF{"MONREMOTE"} = $2;
1159
		
1160
	    } elsif ($1 eq "exclude_period") {
1161
		if (inPeriod (time, $2) == -1)
1162
		  {
1163
		      close (CFG);
1164
		      return "cf error: malformed exclude_period '$2' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
1165
		  }
1166
		$new_CF{"EXCLUDE_PERIOD"} = $2;
1167
	    } else {
1168
		close (CFG);
1169
		return "cf error: unknown variable '$1', line $line_num";
1170
	    }
1171

    
1172
	    next;
1173
	}
1174

    
1175
	#
1176
	# end of record
1177
	#
1178
	if ($l eq "")
1179
	{
1180
	    $ingroup    = 0;
1181
	    $inalias	= 0;
1182
	    $inwatch    = 0;
1183
	    $period	= 0;
1184
	    $inview     = 0;
1185

    
1186
	    $curgroup   = "";
1187
	    $curalias	= "";
1188
	    $watchgroup = "";
1189

    
1190
	    $servnum	= 0;
1191
	    next;
1192
	}
1193

    
1194
	#
1195
	# hostgroup record
1196
	#
1197
	if ($l =~ /^hostgroup\s+([a-zA-Z0-9_.-]+)\s*(.*)/)
1198
	{
1199
	    $curgroup = $1;
1200

    
1201
	    $ingroup = 1;
1202
	    $inview = 0;
1203
	    $inalias = 0;
1204
	    $inwatch = 0;
1205
	    $period  = 0;
1206

    
1207

    
1208
	    $hosts = $2;
1209
	    %disabled = ();
1210

    
1211
	    foreach $h (grep (/^\*/, @{$groups{$curgroup}}))
1212
	    {
1213
		# We have to make $i = $h because $h is actually
1214
		# a pointer to %groups and will modify it.
1215
		$i = $h;
1216
		$i =~ s/^\*//;
1217
		$disabled{$i} = 1;
1218
	    }
1219

    
1220
	    @{$new_groups{$curgroup}} = split(/\s+/, $hosts);
1221

    
1222
	    #
1223
	    # keep hosts which were previously disabled
1224
	    #
1225
	    for ($i=0;$i<@{$new_groups{$curgroup}};$i++)
1226
	    {
1227
		$new_groups{$curgroup}[$i] = "*$new_groups{$curgroup}[$i]"
1228
		    if ($disabled{$new_groups{$curgroup}[$i]});
1229
	    }
1230

    
1231
	    next;
1232
	}
1233

    
1234
	if ($ingroup)
1235
	{
1236
	    push (@{$new_groups{$curgroup}}, split(/\s+/, $l));
1237

    
1238
	    for ($i=0;$i<@{$new_groups{$curgroup}};$i++)
1239
	    {
1240
		$new_groups{$curgroup}[$i] = "*$new_groups{$curgroup}[$i]"
1241
		    if ($disabled{$new_groups{$curgroup}[$i]});
1242
	    }
1243

    
1244
	    next;
1245
	}
1246

    
1247
	#
1248
	# alias record
1249
	#
1250
	if ($l =~ /^alias\s+([a-zA-Z0-9_.-]+)\s*$/)
1251
	{
1252
	    $inalias = 1;
1253
	    $inview = 0;
1254
	    $ingroup = 0;
1255
	    $inwatch = 0;
1256
	    $period  = 0;
1257

    
1258
	    $curalias = $1;
1259
	    next;
1260
	}
1261

    
1262
	if ($inalias)
1263
	{
1264
	    if ($l =~ /\A(.*)\Z/)
1265
	    {
1266
		push (@{$new_alias{$curalias}}, $1);
1267
		next;
1268
	    }
1269
	}
1270

    
1271
	#
1272
	# view record
1273
	#
1274
	if ($l =~ /^view\s+([a-zA-Z0-9_.-]+)\s+(.*)$/)
1275
	{
1276
	    $inview = 1;
1277
	    $inalias = 0;
1278
	    $ingroup = 0;
1279
	    $inwatch = 0;
1280
	    $period  = 0;
1281

    
1282
	    $curview = $1;
1283
            $new_views{$curview}={};
1284

    
1285
	    foreach (split(/\s+/, $2)) {
1286
		$new_views{$curview}->{$_} = 1;
1287
	    };
1288
	    next;
1289
	}
1290
	
1291
	if ($inview)
1292
	{
1293
	    foreach (split(/\s+/, $l)) {
1294
		$new_views{$curview}->{$_} = 1;
1295
	    };
1296
	    next;
1297
	}
1298

    
1299
	#
1300
	# watch record
1301
	#
1302
	if ($l =~ /^watch\s+([a-zA-Z0-9_.-]+)\s*/)
1303
	{
1304
	    $watchgroup = $1;
1305
	    $inwatch = 1;
1306
	    $inview = 0;
1307
	    $inalias = 0;
1308
	    $ingroup = 0;
1309
	    $period  = 0;
1310

    
1311
	    if (!defined ($new_groups{$watchgroup}))
1312
	    {
1313
		#
1314
		# This hostgroup doesn't exist yet, we'll create it and warn
1315
		#
1316
	    	@{$new_groups{$watchgroup}} = ($watchgroup);
1317
		print STDERR "Warning: watch group $watchgroup defined with no corresponding hostgroup.\n";
1318
	    }
1319
	    if ($new_watch{$watchgroup})
1320
	    {
1321
		close (CFG);
1322
		return "cf error: watch '$watchgroup' already defined, line $line_num";
1323
	    }
1324

    
1325
	    $curgroup   = "";
1326
	    $service = "";
1327

    
1328
	    next;
1329
	}
1330

    
1331
	if ($inwatch)
1332
	{
1333
	    #
1334
	    # env variables
1335
	    #
1336
	    if ($l =~ /^([A-Z_][A-Z0-9_]*)=(.*)/)
1337
	    {
1338
		if ($service eq "") {
1339
		    close (CFG);
1340
		    return "cf error: environment variable defined without a service, line $line_num";
1341
		}
1342
		$new_watch{$watchgroup}->{$service}->{"ENV"}->{$1} = $2;
1343

    
1344
		next;
1345
	    }
1346

    
1347
	    #
1348
	    # non-env variables
1349
	    #
1350
	    else
1351
	    {
1352
		$l =~ /^(\w+)\s*(.*)$/;
1353
		$var = $1;
1354
		$args = $2;
1355
	    }
1356

    
1357
	    #
1358
	    # service entry
1359
	    #
1360
	    if ($var eq "service")
1361
	    {
1362
		$service = $args;
1363

    
1364
		if ($service !~ /^[a-zA-Z0-9_.-]+$/) {
1365
		    close (CFG);
1366
		    return "cf error: invalid service tag '$args', line $line_num";
1367
		}
1368

    
1369
		elsif (exists $new_watch{$watchgroup}->{$service})
1370
		{
1371
		    close (CFG);
1372
		    return "cf error: service $service already defined for watch group $watchgroup, line $line_num";
1373
		}
1374

    
1375
		$period = 0;
1376
		$sref = \%{$new_watch{$watchgroup}->{$service}};
1377
		$sref->{"service"} = $args;
1378
		$sref->{"interval"} = undef;
1379
		$sref->{"randskew"} = 0;
1380
                $sref->{"redistribute"} = "";
1381
		$sref->{"dep_behavior"} = $DEP_BEHAVIOR;
1382
		$sref->{"dep_memory"} = $DEP_MEMORY;
1383
		$sref->{"exclude_period"} = "";
1384
		$sref->{"exclude_hosts"} = {};
1385
		$sref->{"_op_status"} = $STAT_UNTESTED;
1386
		$sref->{"_last_op_status"} = $STAT_UNTESTED;
1387
		$sref->{"_ack"} = 0;
1388
		$sref->{"_ack_comment"} = '';
1389
		$sref->{"unack_summary"} = $UNACK_SUMMARY;
1390
		$sref->{"_consec_failures"} = 0;
1391
		$sref->{"_failure_count"} = 0 if (!defined($sref->{"_failure_count"}));
1392
		$sref->{"_start_of_monitor"} = time if (!defined($sref->{"_start_of_monitor"}));
1393
		$sref->{"_alert_count"} = 0 if (!defined($sref->{"_alert_count"}));
1394
		$sref->{"_last_failure"} = 0 if (!defined($sref->{"_last_failure"}));
1395
		$sref->{"_last_success"} = 0 if (!defined($sref->{"_last_success"}));
1396
		$sref->{"_last_trap"} = 0 if (!defined($sref->{"_last_trap"}));
1397
		$sref->{"_last_traphost"} = '' if (!defined($sref->{"_last_traphost"}));
1398
		$sref->{"_exitval"} = "undef" if (!defined($sref->{"_exitval"}));
1399
		$sref->{"_last_check"} = undef;
1400
		#
1401
		# -1 for _monitor_duration means no monitor has been run yet
1402
		# so there is no duration data available
1403
		#
1404
		$sref->{"_monitor_duration"} = -1;
1405
		$sref->{"_monitor_running"} = 0;
1406
		$sref->{"_depend_status"} = undef;
1407
		$sref->{"failure_interval"} = undef;
1408
		$sref->{"_old_interval"} = undef;
1409
		next;
1410
	    }
1411

    
1412
	    if ($service eq "")
1413
	    {
1414
		close (CFG);
1415
		return "cf error: need to specify service in watch record, line $line_num";
1416
	    }
1417

    
1418

    
1419
	    #
1420
	    # period definition
1421
	    #
1422
	    # for each service there can be one or more alert periods
1423
	    # this is stored as an array of hashes named
1424
	    #     %{$watch{$watchgroup}->{$service}->{"periods"}}
1425
	    # each index for this hash is a unique tag for the period as
1426
	    # defined by the user or named after the period (such as
1427
	    # "wd {Mon-Fri} hr {7am-11pm}")
1428
	    #
1429
	    # the value of the hash is an array containing the list of alert commands
1430
	    # and arguments, so
1431
	    #
1432
	    # @alerts = @{$watch{$watchgroup}->{$service}->{"periods"}->{"TAG"}}
1433
	    #
1434
	    if ($var eq "period")
1435
	    {
1436
		$period = 1;
1437

    
1438
		my $periodstr;
1439

    
1440
		if ($args =~ /^([a-z_]\w*) \s* : \s* (.*)$/ix)
1441
		{
1442
		    $periodstr = $1;
1443
		    $args = $2;
1444
		}
1445

    
1446
		else
1447
		{
1448
		    $periodstr = $args;
1449
		}
1450

    
1451
		if (exists $sref->{"periods"}->{$periodstr})
1452
		{
1453
		    close (CFG);
1454
		    return "cf error: period '$periodstr' already defined for watch group $watchgroup service $service, line $line_num";
1455
		}
1456

    
1457
		$pref = \%{$sref->{"periods"}->{$periodstr}};
1458

    
1459
		if (inPeriod (time, $args) == -1)
1460
		{
1461
		    close (CFG);
1462
		    return "cf error: malformed period '$args' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
1463
		}
1464

    
1465
		$pref->{"period"} = $args;
1466
		$pref->{"alertevery"} = 0;
1467
		$pref->{"numalerts"} = 0;
1468
		$pref->{"_alert_sent"} = 0;
1469
		$pref->{"no_comp_alerts"} = 0;
1470
		$pref->{"_no_comp_alerts_upalert_sent"} = 0;
1471
		@{$pref->{"alerts"}} = ();
1472
		@{$pref->{"upalerts"}} = ();
1473
		@{$pref->{"ackalerts"}} = ();
1474
		@{$pref->{"disablealerts"}} = ();
1475
		@{$pref->{"startupalerts"}} = ();
1476
		next;
1477
	    }
1478

    
1479
	    #
1480
	    # period variables
1481
	    #
1482
	    if ($period)
1483
	    {
1484
		if ($var eq "alert")
1485
		{
1486
		    push @{$pref->{"alerts"}}, $args;
1487
		}
1488
		
1489
		elsif ($var eq "ackalert")
1490
		{
1491
		    push @{$pref->{"ackalerts"}}, $args;
1492
		}
1493
		
1494
		elsif ($var eq "disablealert")
1495
		{
1496
		    push @{$pref->{"disablealerts"}}, $args;
1497
		}
1498
		
1499
		elsif ($var eq "upalert")
1500
		{
1501
		    $sref->{"_upalert"} = 1;
1502
		    push @{$pref->{"upalerts"}}, $args;
1503
		}
1504

    
1505
		elsif ($var eq "startupalert")
1506
		{
1507
		    push @{$pref->{"startupalerts"}}, $args;
1508
		}
1509

    
1510
		elsif ($var eq "alertevery")
1511
		{
1512
		    $pref->{"_observe_detail"} = 0;
1513
		    $pref->{"_alertevery_strict"} = 0;
1514
		    $pref->{"_ignore_summary"} = 0;
1515

    
1516
		    if ($args =~ /(\S+) \s+ observe_detail \s*$/ix)
1517
		    {
1518
			$pref->{"_observe_detail"} = 1;
1519
			$args = $1;
1520
		    }
1521

    
1522
		    elsif ($args =~ /(\S+) \s+ ignore_summary \s*$/ix)
1523
		    {
1524
			$pref->{"_ignore_summary"} = 1;
1525
			$args = $1;
1526
		    }
1527

    
1528
		    #
1529
		    # for backawards-compatibility with <= 0.38.21
1530
		    #
1531
		    elsif ($args =~ /(\S+) \s+ summary/ix)
1532
		    {
1533
			$args = $1;
1534
		    }
1535

    
1536
		    #
1537
		    # strict
1538
		    #
1539
		    elsif ($args =~ /(\S+) \s+ strict \s*$/ix)
1540
		    {
1541
			$pref->{"_alertevery_strict"} = 1;
1542
		    	$args = $1;
1543
		    }
1544

    
1545
		    if (!($args = dhmstos ($args))) {
1546
			close (CFG);
1547
			return "cf error: invalid time interval '$args' (syntax: alertevery {positive number}{smhd} [ strict | observe_detail | ignore_summary ]), line $line_num";
1548
		    }
1549

    
1550
		    $pref->{"alertevery"} = $args;
1551
		    next;
1552
		}
1553

    
1554
		elsif ($var eq "alertafter")
1555
		{
1556
		    my ($p1, $p2);
1557

    
1558
		    #
1559
		    # alertafter NUM
1560
		    #
1561
		    if ($args =~ /^(\d+)$/)
1562
		    {
1563
			$p1 = $1;
1564
			$pref->{"alertafter_consec"} = $p1;
1565
		    }
1566

    
1567
		    #
1568
		    # alertafter timeval
1569
		    #
1570
		    elsif ($args =~ /^(\d+[hms])$/)
1571
		    {
1572
			$p1 = $1;
1573
			if (!($p1 = dhmstos ($p1)))
1574
			{
1575
			    close (CFG);
1576
			    return "cf error: invalid time interval '$args' (syntax: alertafter = [{positive integer}] [{positive number}{smhd}]), line $line_num";
1577
			}
1578

    
1579
			$pref->{"alertafterival"} = $p1;
1580
			$pref->{"_1stfailtime"} = 0;
1581
		    }
1582

    
1583
		    #
1584
		    # alertafter NUM timeval
1585
		    #
1586
		    elsif ($args =~ /(\d+)\s+(\d+[hms])$/)
1587
		    {
1588
			($p1, $p2) = ($1, $2);
1589
			if (($p1 - 1) * $sref->{"interval"} >= dhmstos($p2))
1590
			{
1591
			    close (CFG);
1592
			    return "cf error:  interval & alertafter not sensible. No alerts can be generated with those parameters, line $line_num";
1593
			}
1594
			$pref->{"alertafter"} = $p1;
1595
			$pref->{"alertafterival"} = dhmstos ($p2);
1596

    
1597
			$pref->{"_1stfailtime"} = 0;
1598
			$pref->{"_failcount"} = 0;
1599
		    }
1600

    
1601
		    else
1602
		    {
1603
			close (CFG);
1604
			return "cf error: invalid interval specification '$args', line $line_num";
1605
		    }
1606
		}
1607

    
1608
		elsif ($var eq "upalertafter")
1609
		{
1610
		    if (!($args = dhmstos ($args))) {
1611
			close (CFG);
1612
			return "cf error: invalid upalertafter specification '$args' (syntax: upalertafter = {positive number}{smhd}), line $line_num";
1613
		    }
1614

    
1615
		    $pref->{"upalertafter"} = $args;
1616
		}
1617

    
1618
		elsif ($var eq "numalerts")
1619
		{
1620
		    if ($args !~ /^\d+$/) {
1621
			close (CFG);
1622
			return "cf error: -numeric arg '$args' (syntax: numalerts = {positive integer}, line $line_num";
1623
		    }
1624
		    $pref->{"numalerts"} = $args;
1625
		    next;
1626
		}
1627

    
1628
		elsif ($var eq "no_comp_alerts")
1629
		{
1630
		    $pref->{"no_comp_alerts"} = 1;
1631
		    next;
1632
		}
1633

    
1634
		elsif ($var eq "alerts_dont_count")
1635
		{
1636
		    $pref->{"alerts_dont_count"} = 1;
1637
		    next;
1638
		}
1639

    
1640
		elsif ($var eq 'alertexitrange') {
1641
		  if ($args !~ /^\s*(\d+|\d+-\d+)\s*$/) {
1642
		    close (CFG);
1643
		    return "cf error: invalid exit code range '$args', line $line_num";
1644
		  }
1645
		  $pref->{"alertexitrange"} = $args;
1646
		}
1647

    
1648
		else
1649
		{
1650
		    close (CFG);
1651
		    return "cf error: unknown syntax [$l], line $line_num";
1652
		}
1653
		
1654
	    }
1655

    
1656
	    #
1657
	    # non-period variables
1658
	    #
1659
	    elsif (!$period)
1660
	    {
1661
		if ($var eq "interval")
1662
		{
1663
		    if (!($args = dhmstos ($args))) {
1664
			close (CFG);
1665
			return "cf error: invalid time interval '$args' (syntax: interval = {positive number}{smhd}), line $line_num";
1666
		    }
1667
		}
1668

    
1669
		elsif ($var eq "failure_interval")
1670
		{
1671
		    if (!($args = dhmstos ($args))) {
1672
			close (CFG);
1673
			return "cf error: invalid interval '$args' (syntax: failure_interval = {positive number}{smhd}), line $line_num";
1674
		    }
1675
		}
1676

    
1677
		elsif ($var eq "monitor")
1678
		{
1679
		    # valid
1680
		}
1681

    
1682
                elsif ($var eq "redistribute")
1683
                {
1684
                    # valid
1685
                }
1686

    
1687
		elsif ($var eq "allow_empty_group")
1688
		{
1689
		    # valid
1690
		}
1691

    
1692
		elsif ($var eq "description")
1693
		{
1694
		    # valid
1695
		}
1696

    
1697
		elsif ($var eq "unack_summary")
1698
		{
1699
		    if (defined $args) {
1700
			if ($args =~ /y(es)?/i) {
1701
			    $args = 1;
1702
			} elsif ($args =~ /n(o)?/i) {
1703
			    $args = 0;
1704
			}
1705
			if ($args eq "0" || $args eq "1") {
1706
			    $sref->{"unack_summary"} = $args;
1707
			} else {
1708
			    return "cf error: invalid unack_summary value '$args' (syntax: unack_summary [0|1|y|yes|n|no])";
1709
			}
1710
		    } else {
1711
			$sref->{"unack_summary"} = 1;
1712
		    }
1713
		    next;
1714
		}
1715

    
1716
		elsif ($var eq "traptimeout")
1717
		{
1718
		    if (!($args = dhmstos ($args))) {
1719
			close (CFG);
1720
			return "cf error: invalid traptimeout interval '$args' (syntax: traptimeout = {positive number}{smhd}), line $line_num";
1721
		    }
1722
		    $sref->{"_trap_timer"} = $args;
1723
		}
1724

    
1725
		elsif ($var eq "trapduration")
1726
		{
1727
		    if (!($args = dhmstos ($args))) {
1728
			close (CFG);
1729
			return "cf error: invalid trapduration interval '$args' (syntax: trapduration = {positive number}{smhd}), line $line_num";
1730
		    }
1731
		}
1732

    
1733
		elsif ($var eq "randskew")
1734
		{
1735
		    if (!($args = dhmstos ($args))) {
1736
			close (CFG);
1737
			return "cf error: invalid randskew time interval '$args' (syntax: randskew = {positive number}{smhd}), line $line_num";
1738
		    }
1739
		}
1740

    
1741
		elsif ($var eq "dep_behavior")
1742
		{
1743
		    if ($args ne "m" && $args ne "a" && $args ne "hm")
1744
		    {
1745
			close (CFG);
1746
			return "cf error: unknown dependency behavior '$args' (syntax: dep_behavior = {m|a}), line $line_num";
1747
		    }
1748
		}
1749
 
1750
		elsif ($var eq "dep_memory")
1751
		{
1752
		    my $timeval = dhmstos($args);
1753
		    if (!$timeval) {
1754
  		        close (CFG);
1755
			return "cf error: bad value '$args' for dep_memory option (syntax: dep_memory = timeval), line $line_num";
1756
		    }
1757
		    $args = $timeval;
1758
		}
1759

    
1760
		elsif ($var eq "depend")
1761
		{
1762
		    $args =~ s/SELF:/$watchgroup:/g;
1763
		}
1764

    
1765
		elsif ($var eq "alertdepend")
1766
		{
1767
		    $args =~ s/SELF:/$watchgroup:/g;
1768
		}
1769

    
1770
		elsif ($var eq "monitordepend")
1771
		{
1772
		    $args =~ s/SELF:/$watchgroup:/g;
1773
		}
1774

    
1775
		elsif ($var eq "hostdepend")
1776
		{
1777
		    $args =~ s/SELF:/$watchgroup:/g;
1778
		}
1779

    
1780
		elsif ($var eq "exclude_hosts")
1781
		{
1782
		    my $ex = {};
1783
		    foreach my $h (split (/\s+/, $args))
1784
		    {
1785
			$ex->{$h} = 1;
1786
		    }
1787
		    $args = $ex;
1788
		}
1789

    
1790
		elsif ($var eq "exclude_period")
1791
		{
1792
		    if (inPeriod (time, $args) == -1)
1793
		    {
1794
			close (CFG);
1795
			return "cf error: malformed exclude_period '$args' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
1796
		    }
1797
		}
1798

    
1799
		else
1800
		{
1801
		    close (CFG);
1802
		    return "cf error: unknown syntax [$l], line $line_num";
1803
		}
1804

    
1805
		$sref->{$var} = $args;
1806
	    }
1807

    
1808
	    else
1809
	    {
1810
		close (CFG);
1811
		return "cf error: unknown syntax outside of period section [$l], line $line_num";
1812
	    }
1813
	}
1814

    
1815
	next;
1816
    }
1817

    
1818
    close (CFG) || return "Could not open pipe to m4 (check that m4 is properly installed and in your PATH): $!";
1819

    
1820
    #
1821
    # Go through each defined hostgroup and check that there is a 
1822
    #  watch associated with that hostgroup record.
1823
    #
1824
    # hostgroups without associated watches are not a violation of 
1825
    #  mon config syntax, but it's usually not what you want.
1826
    #
1827
    for (keys(%new_watch)) { $is_watch{$_} = 1 };
1828
    foreach $watchgroup ( keys (%new_groups) ) {
1829
	print STDERR "Warning: hostgroup $watchgroup has no watch assigned to it!\n" unless $is_watch{$watchgroup};
1830
    }
1831

    
1832
    #
1833
    # no errors, commit new config if $commit was specified
1834
    #
1835
    return "" unless $commit;
1836
    %views = %new_views;
1837
    %alias = %new_alias;
1838
    %groups = %new_groups;
1839
    %watch = %new_watch;
1840
    %CF = %new_CF;
1841

    
1842
    "";
1843
}
1844

    
1845

    
1846
#
1847
# convert a string like "20m" into seconds
1848
#
1849
sub dhmstos {
1850
    my ($str) = @_;
1851
    my ($s);
1852

    
1853
    $str = lc ($str);
1854

    
1855
    if ($str =~ /^\s*(\d+(?:\.\d+)?)([dhms])\s*$/i) {
1856
	if ($2 eq "m") {
1857
	    $s = $1 * 60;
1858
	} elsif ($2 eq "h") {
1859
	    $s = $1 * 60 * 60;
1860
	} elsif ($2 eq "d") {
1861
	    $s = $1 * 60 * 60 * 24;
1862
	} else {
1863
	    $s = $1;
1864
	}
1865
    } else {
1866
    	return undef;
1867
    }
1868
    $s;
1869
}
1870

    
1871

    
1872
#
1873
# reset the state of the server on SIGHUP, and reread config
1874
# file.
1875
#
1876
sub reset_server {
1877
    my ($keepstate) = @_;
1878

    
1879
    #
1880
    # reap children that may be running
1881
    #
1882
    foreach my $pid (keys %runningpid) {
1883
	my ($group, $service) = split (/\//, $runningpid{$pid});
1884
    	kill 15, $pid;
1885
	waitpid ($pid, 0);
1886
	syslog ('info', "reset killed child $pid, exit status $?");
1887
	remove_proc ($pid);
1888
    }
1889

    
1890
    $procs = 0;
1891
    save_state ("all") if ($keepstate);
1892
    syslog ('info', "resetting, and re-reading configuration $CF{CF}");
1893

    
1894
    if ((my $err = read_cf ($CF{"CF"}, 1)) ne "") {
1895
    	syslog ('err', "error reading config file: $err");
1896
	return undef;
1897
    }
1898

    
1899
    normalize_paths;
1900
    gen_scriptdir_hash;
1901
    $lasttm=time; # the last time(2) the loop started
1902
    $fdset_rbits = $fdset_ebits = '';
1903
    set_last_test ();
1904
    randomize_startdelay() if ($CF{"RANDSTART"});
1905
    load_state ("all") if ($keepstate);
1906
    if ($CF{"DTLOGGING"}) {
1907
	init_dtlog();
1908
    }
1909

    
1910
    readhistoricfile;
1911

    
1912
    if ($CF{"STARTUPALERTS_ON_RESET"}) {
1913
    	do_startup_alerts;
1914
    }
1915

    
1916
    return 1;
1917
}
1918

    
1919

    
1920
sub init_dtlog {
1921
    my $t = time;
1922

    
1923
    return if (!$CF{"DTLOGGING"});
1924

    
1925
    if (!open (DTLOG, ">>$CF{DTLOGFILE}")) {
1926
       syslog ('err', "could not append to $CF{DTLOGFILE}: $!");
1927
       $CF{"DTLOGGING"} = 0;
1928
    } else {
1929
       $CF{"DTLOGGING"} = 1;
1930
       print DTLOG <<EOF;
1931
#
1932
# downtime log start $t
1933
# time back up, group, service, first failure, downtime, interval, summary
1934
#
1935
EOF
1936
    	close (DTLOG);
1937
    }
1938
}
1939

    
1940

    
1941
#
1942
# remove a process from our state
1943
#
1944
sub remove_proc {
1945
    my ($pid) = @_;
1946

    
1947
    return if (!defined $runningpid{$pid});
1948

    
1949
    vec ($fdset_rbits, fileno($fhandles{$runningpid{$pid}}), 1) = 0;
1950
    close ($fhandles{$runningpid{$pid}});
1951
    delete $fhandles{$runningpid{$pid}};
1952
    delete $running{$runningpid{$pid}};
1953
    delete $runningpid{$pid};
1954
    $procs--;
1955
}
1956

    
1957

    
1958
#
1959
# exit on SIGTERM
1960
#
1961
sub handle_sigterm {
1962
    syslog ("info", "caught TERM signal, exiting");
1963
    exit (1);
1964
}
1965

    
1966

    
1967
#
1968
# set O_NONBLOCK and FD_CLOEXEC on the given filehandle
1969
#
1970
sub configure_filehandle {
1971
    my ($fh) = @_;
1972
    my ($fl);
1973

    
1974
    $fl = '';
1975
    $fl = fcntl ($fh, F_GETFL, $fl)          || return;
1976
    $fl |= O_NONBLOCK;
1977
    fcntl ($fh, F_SETFL, $fl)          || return;
1978

    
1979
    $fl = fcntl ($fh, F_GETFD, 0)      || return;
1980
    $fl |= FD_CLOEXEC;
1981
    fcntl ($fh, F_SETFD, $fl)          || return;
1982

    
1983
    return 1;
1984
}
1985

    
1986

    
1987
#
1988
# setup server
1989
#
1990
sub setup_server {
1991
    my ($tcpproto, $udpproto, $fl);
1992

    
1993
    if (!defined ($tcpproto = getprotobyname ('tcp')))
1994
    {
1995
    	die_die ("err", "could not get protocol for tcp");
1996
    }
1997

    
1998
    if (!defined ($udpproto = getprotobyname ('udp')))
1999
    {
2000
    	die_die ("err", "could not get protocol for tcp");
2001
    }
2002

    
2003
    #
2004
    # client server, such as moncmd
2005
    #
2006
    my $bindaddr;
2007
    if (defined $CF{"SERVERBIND"})
2008
    {
2009
	if (!($bindaddr = gethostbyname ($CF{"SERVERBIND"})))
2010
	{
2011
	    die_die ("err", "error returned by gethostbyname for serverbind: $?");
2012
	}
2013
    }
2014

    
2015
    else
2016
    {
2017
    	$bindaddr = INADDR_ANY;
2018
    }
2019

    
2020
    socket (SERVER, PF_INET, SOCK_STREAM, $tcpproto) ||
2021
    	die_die ("err", "could not create TCP socket: $!");
2022

    
2023
    setsockopt (SERVER, SOL_SOCKET, SO_REUSEADDR, pack ("l", 1)) ||
2024
    	die_die ("err", "could not setsockopt: $!");
2025

    
2026
    bind (SERVER, sockaddr_in ($CF{"SERVPORT"}, $bindaddr)) ||
2027
    	die_die ("err", "could not bind TCP server port $CF{'SERVPORT'}: $!");
2028

    
2029
    listen (SERVER, SOMAXCONN);
2030

    
2031
    configure_filehandle (*SERVER) ||
2032
    	die_die ("err", "could not configure TCP server port: $!");
2033

    
2034
    #
2035
    # remote monitor traps
2036
    #
2037
    if (defined $CF{"TRAPBIND"})
2038
    {
2039
	if (!($bindaddr = gethostbyname ($CF{"TRAPBIND"})))
2040
	{
2041
	    die_die ("err", "error returned by gethostbyname for trapbind: $?");
2042
	}
2043
    }
2044

    
2045
    else
2046
    {
2047
    	$bindaddr = INADDR_ANY;
2048
    }
2049

    
2050
    socket (TRAPSERVER, PF_INET, SOCK_DGRAM, $udpproto) ||
2051
    	die_die ("err", "could not create UDP socket: $!");
2052
    bind (TRAPSERVER, sockaddr_in ($CF{"TRAPPORT"}, $bindaddr)) ||
2053
    	die_die ("err", "could not bind UDP server port: $!");
2054
    configure_filehandle (*TRAPSERVER) ||
2055
    	die_die ("err", "could not configure UDP trap port: $!");
2056
}
2057

    
2058

    
2059
#
2060
# set up a client connection if necessary
2061
#
2062
sub client_accept {
2063
    my ($rin, $rout, $n, $sock, $port, $addr, $fl);
2064

    
2065
    my $CLIENT = new FileHandle;
2066

    
2067
    if (!defined ($sock = accept ($CLIENT, SERVER))) {
2068
    	syslog ('err', "accept returned error: $!");
2069
	return;
2070
    }
2071

    
2072
debug(1, "accepted client $CLIENT\n");
2073
    my $fno = fileno ($CLIENT);
2074

    
2075
    #
2076
    # set socket to nonblocking
2077
    #
2078
    if (!configure_filehandle ($CLIENT)) {
2079
    	syslog ("err", "could not configure for client: $!");
2080
	close ($CLIENT);
2081
	return;
2082
    }
2083

    
2084
    ($port, $addr) = unpack_sockaddr_in ($sock);
2085
    my $clientip = inet_ntoa($addr);
2086

    
2087
    syslog ('info', "client connection from $clientip:$port");
2088

    
2089
    my @clientregex = split(' ', $CF{"CLIENTALLOW"});
2090
    my $ipok= 0;
2091

    
2092
    foreach my $ippattern (@clientregex)
2093
    {
2094
	#
2095
	# change all periods, except those preceded by [ or \, into \.
2096
	#
2097
	$ippattern=~ s/([^[\\])\./$1\\./g;
2098

    
2099
	if ($clientip =~ /^${ippattern}$/)
2100
	{
2101
	    $ipok= 1;
2102
	    last;
2103
	}
2104
    }
2105

    
2106
    if (! $ipok)
2107
    {
2108
	syslog('notice', "closing unwanted client: $clientip");
2109
	close($CLIENT);
2110
	return;
2111
    }
2112

    
2113
    select ($CLIENT);
2114
    $|=1;
2115
    select (STDOUT);
2116

    
2117
    $clients{$fno}->{"host"} = inet_ntoa($addr);
2118
    $clients{$fno}->{"fhandle"} = $CLIENT;
2119
    $clients{$fno}->{"user"} = undef;		# username if authenticated
2120
    $clients{$fno}->{"timeout"} = $CF{"CLIENT_TIMEOUT"};
2121
    $clients{$fno}->{"last_read"} = time;		# last time data was read
2122
    $clients{$fno}->{"buf"} = '';
2123
    $numclients++;
2124
}
2125

    
2126

    
2127
#
2128
# do all pending client commands
2129
#
2130
sub client_dopending {
2131
    my ($cl, $cmd, $l);
2132

    
2133
    foreach $cl (keys %clients) {
2134
    	if ($clients{$cl}->{"buf"} =~ /^([^\r\n]*)[\r\n]+/s) {
2135
	    $cmd = $1;
2136
	    $l = length ($cmd);
2137
	    $clients{$cl}->{"buf"} =~ s/^[^\r\n]*[\r\n]+//s;
2138
	    client_command ($cl, $cmd);
2139
	}
2140
    }
2141
}
2142

    
2143

    
2144
#
2145
# close a client connection
2146
#
2147
sub client_close {
2148
    my ($cl, $reason) = @_;
2149

    
2150
    syslog ('info', "closing client $cl: $reason") if (defined $reason);
2151
    die if !defined ($clients{$cl}->{"fhandle"});
2152
    close ($clients{$cl}->{"fhandle"});
2153
    delete $clients{$cl};
2154
    vec ($iovec, $cl, 1) = 0;
2155
    $numclients--;
2156
}
2157

    
2158

    
2159
#
2160
# Handle a connection from a client
2161
#
2162
sub client_command {
2163
    my ($cl, $l) = @_;
2164
    my ($cmd, $args, $group, $service, $s, $sname, $stchanged);
2165
    my ($var, $value, $msg, @l, $sock, $port, $addr, $sref, $auth, $fh);
2166
    my ($user, $pass, @argsList, $comment);
2167
    my ($authtype, @authtypes);
2168
    my $is_auth = 0;    #flag for multiple auth types
2169

    
2170
    syslog ('info', "client command \"$l\"")
2171
	if ($l !~ /^\s*login/i);
2172

    
2173
    $fh = $clients{$cl}->{"fhandle"};
2174

    
2175
    if ($l !~ /^(dump|login|disable|enable|quit|list|set|get|setview|getview|
2176
		    stop|start|loadstate|savestate|reset|clear|checkauth|
2177
		    reload|term|test|servertime|ack|version|protid)(\s+(.*))?$/ix) {
2178
	sock_write ($fh, "520 invalid command\n");
2179
	return;
2180
    }
2181
    ($cmd, $args) = ("\L$1", $3);
2182

    
2183
    $stchanged = 0;
2184

    
2185
    print STDERR "client command $cmd\nclient args $args\n";
2186
    #
2187
    # quit command
2188
    #
2189
    if ($cmd eq "quit") {
2190
	sock_write ($fh, "220 quitting\n");
2191
	client_close ($cl);
2192

    
2193
    } elsif ($opt{"d"} && $cmd eq "dump") {
2194
    	print STDERR Dumper (\%watch), "\n\n";
2195

    
2196
    #
2197
    # protocol identification
2198
    #
2199
    } elsif ($cmd eq "protid") {
2200
    	if ($args != int ($PROT_VERSION))
2201
	{	
2202
	    sock_write ($fh, "520 protocol mismatch\n");
2203
	}
2204

    
2205
	else
2206
	{
2207
	    sock_write ($fh, "220 protocol match\n");
2208
	}
2209

    
2210
    #
2211
    # login
2212
    #
2213
    } elsif ($cmd eq "login") {
2214
	($user, $pass) = split (/\s+/, $args, 2);
2215
	@authtypes = split(' ' , $CF{"AUTHTYPE"}) ;
2216
	# Check each for of authentication in order, and stop checking
2217
	# as soon as we get a positive authentication result.
2218
	foreach $authtype (@authtypes) {
2219
            if (defined auth ($authtype, $user, $pass, $clients{$cl}->{"host"})) {
2220
		$is_auth = 1;
2221
		last;
2222
	    }
2223
	}
2224
	if ($is_auth != 1) {
2225
	    sock_write ($fh,  "530 login unsuccessful\n");
2226
	} else {
2227
	    $clients{$cl}->{"user"} = $user;
2228
	    syslog ("info", "authenticated $user");
2229
	    sock_write ($fh,  "220 login accepted\n");
2230
	}
2231

    
2232
    #
2233
    # reset
2234
    #
2235
    } elsif ($cmd eq "reset" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2236
	my ($keepstate);
2237
	if ($args =~ /stopped/i) {
2238
	    $STOPPED = 1;
2239
	    $STOPPED_TIME = time;
2240
	}
2241

    
2242
	if ($args =~ /keepstate/) {
2243
	    $keepstate = 1;
2244
	}
2245

    
2246
	if (reset_server ($keepstate)) {
2247
	    sock_write ($fh,  "220 reset PID $$\@$HOSTNAME\n");
2248
	} else {
2249
	    sock_write ($fh,  "520 reset PID $$\@$HOSTNAME failed, error in config file\n");
2250
	}
2251

    
2252
    #
2253
    # reload
2254
    #
2255
    } elsif ($cmd eq "reload" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2256
	if (!defined reload (split (/\s+/, $args))) {
2257
	    sock_write ($fh,  "520 unknown reload command\n");
2258
	} else {
2259
	    sock_write ($fh,  "220 reload completed\n");
2260
	}
2261

    
2262
    #
2263
    # clear
2264
    #
2265
    } elsif ($cmd eq "clear" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2266
    	if ($args =~ /^timers \s+ ([a-zA-Z0-9_.-]+) \s+ ([a-zA-Z0-9_.-]+)/ix) {
2267
	    if (!defined $watch{$1}->{$2}) {
2268
		sock_write ($fh,  "520 unknown group\n");
2269
	    } else {
2270
		clear_timers ($1, $2);
2271
		sock_write ($fh,  "220 clear timers completed\n");
2272
	    }
2273

    
2274
	} else {
2275
	    sock_write ($fh,  "520 unknown clear command\n");
2276
	    next;
2277
	}
2278

    
2279
    #
2280
    # test
2281
    #
2282
    } elsif ($cmd eq "test" && check_auth ($clients{$cl}->{"user"}, $cmd))  {
2283
	my ($cmd, $args) = split (/\s+/, $args, 2);
2284

    
2285
	#
2286
	# test monitor
2287
	#
2288
	if ($cmd eq "monitor") {
2289
	    my ($group, $service) = split (/\s+/, $args);
2290

    
2291
	    if (!defined $watch{$group}->{$service}) {
2292
		sock_write ($fh,  "$group $service not defined\n");
2293
	    } else {
2294
		$watch{$group}->{$service}->{"_timer"} = 0;
2295
                $watch{$group}->{$service}->{"_next_check"} = 0;
2296
		mysystem("$CF{MONREMOTE} test $group $service") if ($CF{MONREMOTE});
2297
	    }
2298
	    sock_write ($fh,  "220 test monitor completed\n");
2299

    
2300
	#
2301
	# test alert
2302
	#
2303
	} elsif ($cmd =~ /^alert|startupalert|upalert|ackalert|disablealert$/) {
2304
	    my ($group, $service, $retval, $period) = split (/\s+/, $args, 4);
2305

    
2306
	    if (!defined $watch{$group}->{$service}) {
2307
		sock_write ($fh,  "520 $group $service not defined\n");
2308

    
2309
	    } elsif (!defined $watch{$group}->{$service}->{"periods"}->{$period}) {
2310
		    sock_write ($fh,  "520 period not defined\n");
2311

    
2312
	    } else {
2313
		my $f = 0;
2314
		my $a;
2315

    
2316
		if ($cmd eq "alert") {
2317
		    $a = $watch{$group}->{$service}->{"periods"}->{$period}->{"alerts"};
2318
		} elsif ($cmd eq "startupalert") {
2319
		    $f = $FL_STARTUPALERT;
2320
		    $a = $watch{$group}->{$service}->{"periods"}->{$period}->{"startupalerts"};
2321
		} elsif ($cmd eq "upalert") {
2322
		    $f = $FL_UPALERT;
2323
		    $a = $watch{$group}->{$service}->{"periods"}->{$period}->{"upalerts"};
2324
		} elsif ($cmd eq "ackalert") {
2325
		    $f = $FL_ACKALERT;
2326
		    $a = $watch{$group}->{$service}->{"periods"}->{$period}->{"ackalerts"};
2327
		} elsif ($cmd eq "disablealert") {
2328
		    $f = $FL_DISABLEALERT;
2329
		    $a = $watch{$group}->{$service}->{"periods"}->{$period}->{"disablealerts"};
2330
		}
2331

    
2332
		for (@{$a}) {
2333
		    my ($alert, $args) = split (/\s+/, $_, 2);
2334

    
2335
		    if ($args =~ /^exit=/) {
2336
		    	$args =~ s/^exit=\S+ \s+//x;
2337
		    }
2338

    
2339
		    call_alert (
2340
			group	=> $group,
2341
			service	=> $service,
2342
			output	=> "test\ntest detail\n",
2343
			retval	=> $retval,
2344
			flags	=> $f | $FL_TEST,
2345
			alert	=> $alert,
2346
			args	=> $args,
2347
		    );
2348
		}
2349

    
2350
		sock_write ($fh,  "220 test alert completed\n");
2351
	    }
2352

    
2353
	#
2354
        # test config file
2355
        #
2356
        } elsif ($cmd =~ /^config$/) {
2357
	    if ((my $err = read_cf ($CF{"CF"}, 0))  ne "") {
2358
		sock_write ($fh,  $err);
2359
		sock_write ($fh,  "\n520 test config completed, errors found in config file\n");
2360
	    }
2361

    
2362
	    else
2363
	    {
2364
		sock_write ($fh,  "220 test config completed OK, no errors found\n");
2365
	    }
2366

    
2367
	} else {
2368
	    sock_write ($fh,  "520 test error\n");
2369
	}
2370

    
2371
    #
2372
    # version
2373
    #
2374
    } elsif ($cmd eq "version") {
2375
    	sock_write ($fh, "version " . int ($PROT_VERSION) . "\n");
2376
    	sock_write ($fh, "220 version completed\n");
2377

    
2378
    #
2379
    # load state
2380
    #
2381
    } elsif ($cmd eq "loadstate" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2382
	foreach (split (/\s+/, $args)) {
2383
	    load_state ($_);
2384
	}
2385
	sock_write ($fh,  "220 loadstate completed\n");
2386

    
2387
    #
2388
    # save state
2389
    #
2390
    } elsif ($cmd eq "savestate" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2391
	if ($args =~ /\S/)
2392
	{
2393
	    foreach (split (/\s+/, $args))
2394
	    {
2395
		save_state ($_);
2396
	    }
2397
	    sock_write ($fh,  "220 savestate completed\n");
2398
	}
2399

    
2400
	else
2401
	{
2402
	    sock_write ($fh,  "520 savestate error, arguments required\n");
2403
	}
2404

    
2405
    #
2406
    # term
2407
    #
2408
    } elsif ($cmd eq "term"  && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2409
	sock_write ($fh,  "220 terminating server\n");
2410
	client_close ($cl, "terminated by user command");
2411
	syslog ("info", "terminating by user command");
2412
	exit;
2413

    
2414
    #
2415
    # stop testing
2416
    #
2417
    } elsif ($cmd eq "stop"&& check_auth ($clients{$cl}->{"user"}, $cmd)) {
2418
	$STOPPED = 1;
2419
	$STOPPED_TIME = time;
2420
	sock_write ($fh,  "220 stop completed\n");
2421

    
2422
    #
2423
    # start testing
2424
    #
2425
    } elsif ($cmd eq "start" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2426
	$STOPPED = 0;
2427
	$STOPPED_TIME = 0;
2428
	sock_write ($fh,  "220 start completed\n");
2429

    
2430
    } elsif ($cmd eq "setview") {
2431
        my @args=split /\s+/, $args;
2432
        if (@args > 1) {
2433
            sock_write($fh, "500 Unknown setview command\n")
2434
        } elsif (@args == 1) {
2435
            if (defined($views{$args[0]})) {
2436
                $clients{$cl}->{"view"} = $args[0];
2437
                sock_write($fh, "selecting view $args[0]\n");
2438
                sock_write($fh, "220 setview completed\n")
2439
            } else {
2440
                sock_write($fh, "504 unknown view $args[0]\n");
2441
            }
2442
        } else {
2443
            delete $clients{$cl}->{"view"};
2444
            sock_write($fh, "no view selected -- all groups will be displayed\n");
2445
            sock_write($fh, "220 setview completed\n")
2446
        }
2447
    } elsif ($cmd eq "getview") {
2448
        if ($clients{$cl}->{"view"}) {
2449
            sock_write($fh, "view ".$clients{$cl}->{"view"}. " selected\n");
2450
        } else {
2451
            sock_write($fh, "no view selected -- all groups will be displayed\n");
2452
      }
2453
      sock_write($fh, "220 getview completed\n")
2454
    #
2455
    # set
2456
    #
2457
    } elsif ($cmd eq "set" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2458
	if ($args =~ /^maxkeep\s+(\d+)/) {
2459
	    $CF{"MAX_KEEP"} = $1;
2460
	    sock_write ($fh,  "220 set completed\n");
2461
	} else {
2462
	    ($group, $service, $var, $value) = split (/\s+/, $args, 4);
2463
	    if (!defined $watch{$group}->{$service}) {
2464
		sock_write ($fh,  "520 $group,$service not defined\n");
2465
	    } elsif ($var eq "opstatus") {
2466
		if (!defined ($OPSTAT{$value})) {
2467
		    sock_write ($fh,  "520 undefined opstatus\n");
2468
		} else {
2469
		    set_op_status ($group, $service,
2470
		    	un_esc_str ((parse_line ('\s+', 0, $value))[0]));
2471
		    sock_write ($fh,  "220 set completed\n");
2472
		}
2473

    
2474
	    } else {
2475
		$value = un_esc_str ((parse_line ('\s+', 0, $value))[0]);
2476
		$watch{$group}->{$service}->{$var} = $value;
2477
		sock_write ($fh,  "$group $service $var='$value'\n");
2478
		sock_write ($fh,  "220 set completed\n");
2479
	    }
2480
	}
2481

    
2482
    #
2483
    # get
2484
    #
2485
    } elsif ($cmd eq "get" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2486
	if ($args =~ /^maxkeep\s*$/) {
2487
	    sock_write ($fh,  "maxkeep = $CF{MAX_KEEP}\n");
2488
	    sock_write ($fh,  "220 set completed\n");
2489
	} else {
2490
	    ($group, $service, $var) = split (/\s+/, $args, 3);
2491
	    if (!defined $watch{$group}->{$service}) {
2492
		sock_write ($fh,  "520 $group,$service not defined\n");
2493
	    } else {
2494
		sock_write ($fh,  "$group $service $var='" .
2495
			esc_str ($watch{$group}->{$service}->{$var}, 1) . "'\n");
2496
		sock_write ($fh,  "220 get completed\n");
2497
	    }
2498
	}
2499

    
2500
    #
2501
    # list
2502
    #
2503
    } elsif ($cmd eq "list" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2504
	@argsList = split(/\s+/, $args);
2505
	($cmd, $args) = split (/\s+/, $args, 2);
2506

    
2507
	#
2508
	# list service descriptions
2509
	#
2510
	if ($cmd eq "descriptions") {
2511
	    foreach $group (keys %watch) {
2512
		foreach $service (keys %{$watch{$group}}) {
2513
                    if (view_match($clients{$cl}->{"view"}, $group, $service)) {
2514
                        sock_write ($fh,  "$group $service " .
2515
                                    esc_str ($watch{$group}->{$service}->{"description"}, 1) .
2516
                                    "\n");
2517
                    }
2518
		}
2519
	    }
2520
	    sock_write ($fh,  "220 list descriptions completed\n");
2521

    
2522
	#
2523
	# list group members
2524
	#
2525
	} elsif ($cmd eq "group") {
2526
	    if ($groups{$args}) {
2527
		sock_write ($fh,  "hostgroup $args @{$groups{$args}}\n");
2528
		sock_write ($fh,  "220 list group completed\n");
2529
	    } else {
2530
		sock_write ($fh,  "520 list group error, undefined group\n");
2531
	    }
2532

    
2533
	#
2534
	# list status of all services
2535
	#
2536
	} elsif ($cmd eq "opstatus") {
2537
	    if (!defined $args || $args eq "")
2538
	    {
2539
		foreach $group (keys %watch) {
2540
		    foreach $service (keys %{$watch{$group}}) {
2541
                        if (view_match($clients{$cl}->{"view"}, $group, $service)) {
2542
                            client_write_opstatus ($fh, $group, $service);
2543
                        }
2544
		    }
2545
		}
2546
		sock_write ($fh,  "220 list opstatus completed\n");
2547
	    }
2548

    
2549
	    else
2550
	    {
2551
	    	my $err = 0;
2552
		my @g = ();
2553
		my ($group, $service);
2554

    
2555
		foreach my $gs (split (/\s+/, $args))
2556
		{
2557
		    ($group, $service) = split (/,/, $gs);
2558
		    $err++ && last if ($service ne "" && !defined $watch{$group}->{$service});
2559
		    push (@g, [$group, $service]);
2560
		}
2561

    
2562
		if (!$err)
2563
		{
2564
		    foreach my $gs (@g)
2565
		    {
2566
			if ($gs->[1] ne "") {
2567
			    client_write_opstatus ($fh, $gs->[0], $gs->[1]);
2568
			} else {
2569
			    foreach $service (keys %{$watch{$gs->[0]}}) {
2570
				client_write_opstatus ($fh, $gs->[0], $service);
2571
			    }
2572
			}
2573
		    }
2574
		    sock_write ($fh,  "220 list opstatus completed\n");
2575
		}
2576

    
2577
		else
2578
		{
2579
		    sock_write ($fh,  "520 $group,$service does not exist\n");
2580
		}
2581
	    }
2582

    
2583
	#
2584
	# list disabled hosts and services
2585
	#
2586
	} elsif ($cmd eq "disabled") {
2587
	    foreach $group (keys %groups) {
2588
                if (view_match($clients{$cl}->{"view"}, $group, undef)) {
2589
                    @l = grep (/^\*/, @{$groups{$group}});
2590
                    if (@l) {
2591
                        grep (s/^\*//, @l);
2592
                        sock_write ($fh,  "group $group: @l\n");
2593
                    }
2594
                }
2595
	    }
2596
	    foreach $group (keys %watch) {
2597
                if (view_match($clients{$cl}->{"view"}, $group, undef)) {
2598
                    if (exists $watch_disabled{$group} && $watch_disabled{$group} == 1) {
2599
                        sock_write ($fh,  "watch $group\n");
2600
                    }
2601
                }
2602
		foreach $service (keys %{$watch{$group}}) {
2603
                    if (view_match($clients{$cl}->{"view"}, $group, $service)) {
2604
                        if (defined $watch{$group}->{$service}->{'disable'} 
2605
                            && $watch{$group}->{$service}->{'disable'} == 1) {
2606
                            sock_write ($fh,  "watch $group service " .
2607
                                        "$service\n");
2608
                        }
2609
                    }
2610
                }
2611
            }
2612
            sock_write ($fh,  "220 list disabled completed\n");
2613

    
2614
	#
2615
	# list last alert history
2616
	#
2617
	} elsif ($cmd eq "alerthist") {
2618
	    foreach my $l (@last_alerts)
2619
	    {
2620
		sock_write ($fh,  esc_str ($l) . "\n");
2621
	    }
2622
	    sock_write ($fh,  "220 list alerthist completed\n");
2623

    
2624
	#
2625
	# list time of last failures for each service
2626
	#
2627
	} elsif ($cmd eq "failures") {
2628
	    foreach $group (keys %watch) {
2629
		foreach $service (keys %{$watch{$group}}) {
2630
                    if (view_match($clients{$cl}->{"view"}, $group, $service)) {
2631
                        my $sref = \%{$watch{$group}->{$service}};
2632
                        client_write_opstatus ($fh, $group, $service)
2633
                            if ($FAILURE{$sref->{"_op_status"}});
2634
                    }
2635
		}
2636
	    }
2637
	    sock_write ($fh,  "220 list failures completed\n");
2638

    
2639
	#
2640
	# list the failure history
2641
	#
2642
	} elsif ($cmd eq "failurehist") {
2643
	    foreach my $l (@last_failures)
2644
	    {
2645
		sock_write ($fh, esc_str ($l) . "\n");
2646
	    }
2647
	    sock_write ($fh,  "220 list failurehist completed\n");
2648

    
2649
	#
2650
	# list the time of last successes for each service
2651
	#
2652
	} elsif ($cmd eq "successes") {
2653
	    foreach $group (keys %watch) {
2654
		foreach $service (keys %{$watch{$group}}) {
2655
                    if (view_match($clients{$cl}->{"view"}, $group, $service)) {
2656
                        my $sref = \%{$watch{$group}->{$service}};
2657
                        client_write_opstatus ($fh, $group, $service)
2658
                            if ($SUCCESS{$sref->{"_op_status"}});
2659
                    }
2660
		}
2661
	    }
2662
	    sock_write ($fh,  "220 list successes completed\n");
2663

    
2664
	#
2665
	# list warnings
2666
	#
2667
	} elsif ($cmd eq "warnings") {
2668
	    foreach $group (keys %watch) {
2669
		foreach $service (keys %{$watch{$group}}) {
2670
                    if (view_match($clients{$cl}->{"view"}, $group, $service)) {
2671
                        my $sref = \%{$watch{$group}->{$service}};
2672
                        client_write_opstatus ($fh, $group, $service)
2673
                            if ($WARNING{$sref->{"_op_status"}});
2674
                    }
2675
		}
2676
	    }
2677
	    sock_write ($fh,  "220 list successes completed\n");
2678

    
2679
	#
2680
	# list process IDs
2681
	#
2682
	} elsif ($cmd eq "pids") {
2683
	    sock_write ($fh,  "server $$\n");
2684
	    foreach $value (keys %runningpid) {
2685
		($group, $service) = split (/\//, $runningpid{$value});
2686
		sock_write ($fh,  "$group $service $value\n");
2687
	    }
2688
	    sock_write ($fh,  "220 list pids completed\n");
2689

    
2690
	#
2691
	# list watch groups and services
2692
	#
2693
	} elsif ($cmd eq "watch") {
2694
	    foreach $group (keys %watch) {
2695
		foreach $service (keys %{$watch{$group}}) {
2696
                    if (view_match($clients{$cl}->{"view"}, $group, $service)) {
2697
                        if (!defined $watch{$group}->{$service}) {
2698
                            sock_write ($fh,  "$group (undefined service)\n");
2699
                        } else {
2700
                            sock_write ($fh,  "$group $service\n");
2701
                        }
2702
                    }
2703
		}
2704
	    }
2705
	    sock_write ($fh,  "220 list watch completed\n");
2706

    
2707
	#
2708
	# list server state
2709
	#
2710
	} elsif ($cmd eq "state") {
2711
	    if ($STOPPED) {
2712
		sock_write ($fh,  "scheduler stopped since $STOPPED_TIME\n");
2713
	    } else {
2714
		sock_write ($fh,  "scheduler running\n");
2715
	    }
2716
	    sock_write ($fh,  "220 list state completed\n");
2717

    
2718
	#
2719
	# list aliases
2720
	#
2721
	} elsif ($cmd eq "aliases") {
2722
	    my (@listAliasesRequest) = @argsList;
2723

    
2724
	    shift (@listAliasesRequest);
2725

    
2726
	    # if no alias request, all alias are responded
2727
	    unless (@listAliasesRequest) {
2728
	    	@listAliasesRequest = keys (%alias);
2729
	    }
2730

    
2731
	    foreach my $alias (@listAliasesRequest){
2732
	    	sock_write ($fh, "alias $alias\n");
2733
		foreach $value (@{$alias{$alias}}) {
2734
		    sock_write ($fh,  "$value\n");
2735
		}
2736
		sock_write ($fh, "\n");
2737
	    }
2738
	    sock_write ($fh,  "220 list aliases completed\n");
2739

    
2740
	#
2741
	# list aliasgroups
2742
	#
2743
	} elsif ($cmd eq "aliasgroups") {
2744
	    my (@listAliasesRequest);
2745
	    @listAliasesRequest = keys (%alias);
2746

    
2747
	    sock_write ($fh,  "@listAliasesRequest\n")
2748
	    	unless (@listAliasesRequest == 0);
2749
	    sock_write ($fh,  "220 list aliasgroups completed\n");
2750

    
2751
	#
2752
	# list deps
2753
	#
2754
	} elsif ($cmd eq "deps") {
2755
	    foreach my $g (keys %watch) {
2756
	    	foreach my $s (keys %{$watch{$g}}) {
2757
                    if (view_match($clients{$cl}->{"view"}, $group, $service)) {
2758
                        my $sref = \%{$watch{$g}->{$s}};
2759
                        if ($sref->{"depend"} ne "") {
2760
                            sock_write ($fh, "exp $g $s '" .
2761
                                        esc_str ($sref->{"depend"}, 1) . "'\n");
2762
                        } else {
2763
                            sock_write ($fh, "exp $g $s 'NONE'\n");
2764
                        }
2765
                        my @u =
2766
                            ($sref->{"depend"} =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g);
2767
                        if (@u) {
2768
                            sock_write ($fh, "cmp $g $s @u\n");
2769
                        } else {
2770
                            sock_write ($fh, "cmp $g $s NONE\n");
2771
                        }
2772
                    }
2773
                }
2774
	    }
2775

    
2776
	    sock_write ($fh,  "220 list deps completed\n");
2777

    
2778
	#
2779
	# downtime log
2780
	#
2781
	} elsif ($cmd eq "dtlog") {
2782
	    if ($CF{"DTLOGGING"}) {
2783
	    	if (!open (DTLOGTMP, "<  $CF{DTLOGFILE}")) {
2784
		    sock_write ($fh, "520 list dtlog error, cannot open dtlog\n");
2785

    
2786
		} else {
2787
		    while (<DTLOGTMP>) {
2788
		    	sock_write ($fh, $_ ) if (!/^#/ && !/^\s*$/);
2789
		    }
2790

    
2791
		    close (DTLOGTMP);
2792

    
2793
		    sock_write ($fh, "220 list dtlog completed\n");
2794
		}
2795

    
2796
	    } else {
2797
	    	sock_write ($fh, "520 list dtlog error, dtlogging is not turned on\n");
2798
	    }
2799

    
2800
	#
2801
	# list available views
2802
	#
2803
	} elsif ($cmd eq "views") {
2804
	    sock_write ($fh,  "views ".join(' ',sort(keys %views))."\n");
2805
	    sock_write ($fh,  "220 list group completed\n");
2806

    
2807

    
2808
        # unknown list command
2809
	} else {
2810
	    sock_write ($fh,  "520 unknown list command\n");
2811
	}
2812

    
2813
    #
2814
    # acknowledge a failure
2815
    #
2816
    } elsif ($cmd eq "ack" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2817
	my ($group, $service, $comment) = split (/\s+/, $args, 3);
2818

    
2819
	if (!defined ($watch{$group})) {
2820
	    sock_write ($fh,  "520 unknown group\n");
2821

    
2822
	} elsif (!defined $watch{$group}->{$service}) {
2823
	    sock_write ($fh,  "520 unknown service\n");
2824
	}
2825

    
2826
	my $sref = \%{$watch{$group}->{$service}};
2827

    
2828
	if ($sref->{"_op_status"} == $STAT_OK ||
2829
		  $sref->{"_op_status"} == $STAT_UNTESTED) {
2830
	    sock_write ($fh,  "520 service is in a non-failure state\n");
2831

    
2832
	} else {
2833
	    $sref->{"_ack"} = time;
2834
            $sref->{"_ack_comment"} = $clients{$cl}->{"user"} . ": " .
2835
		    un_esc_str ((parse_line ('\s+', 0, $comment))[0]);
2836
	    sock_write ($fh,  "220 ack completed\n");
2837
 	    do_alert($group, $service, $sref->{"_ack_comment"}, undef, $FL_ACKALERT)
2838
	}
2839

    
2840
    #
2841
    # disable watch, service or host
2842
    #
2843
    } elsif ($cmd eq "disable" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2844
	($cmd, $args) = split (/\s+/, $args, 2);
2845

    
2846
	#
2847
	# disable watch
2848
	#
2849
	if ($cmd eq "watch") {
2850
	    if (!defined (disen_watch($args, 0))) {
2851
		sock_write ($fh,  "520 disable error, unknown watch \"$args\"\n");
2852
	    } else {
2853
		$stchanged++;
2854
		mysystem("$CF{MONREMOTE} disable watch $args") if ($CF{MONREMOTE});
2855
		sock_write ($fh,  "220 disable watch completed\n");
2856
	    }
2857

    
2858
	#
2859
	# disable service
2860
	#
2861
	} elsif ($cmd eq "service") {
2862
	    ($group, $service) = split (/\s+/, $args, 2);
2863

    
2864
	    if (!defined (disen_service ($group, $service, 0))) {
2865
		sock_write ($fh,  "520 disable error, unknown service\n");
2866
	    } else {
2867
		$stchanged++;
2868
		mysystem("$CF{MONREMOTE} disable service $group $service") if ($CF{MONREMOTE});
2869
		sock_write ($fh,  "220 disable service completed\n");
2870
		do_alert($group, $service, $clients{$cl}->{"user"}, undef, $FL_DISABLEALERT)
2871
	    }
2872

    
2873
	#
2874
	# disable host
2875
	#
2876
	} elsif ($cmd eq "host") {
2877
	    my @notfound = ();
2878

    
2879
	    my @hosts = split (/\s+/, $args);
2880

    
2881
	    foreach my $h (@hosts)
2882
	    {
2883
	    	if (!host_exists ($h))
2884
		{
2885
		    push @notfound, $h;
2886
		}
2887
	    }
2888

    
2889
	    if (@notfound)
2890
	    {
2891
	    	sock_write ($fh, "520 disable host failed, host(s) @notfound do not exist\n");
2892
	    }
2893

    
2894
	    else
2895
	    {
2896
		foreach my $h (@hosts)
2897
		{
2898
		    #
2899
		    # disable a watch if there is a group with this host
2900
		    # as its only member. this prevents warning messages
2901
		    # about monitors not being run on empty host groups
2902
		    #
2903
                    foreach my $g (host_singleton_group($h)) {
2904
                        disen_watch($g, 0);
2905
			mysystem("$CF{MONREMOTE} disable watch $g") if ($CF{MONREMOTE});
2906
                    }
2907

    
2908
		    disen_host ($h, 0);
2909
		    $stchanged++;
2910
		    mysystem("$CF{MONREMOTE} disable host $h") if ($CF{MONREMOTE});
2911
                }
2912
                sock_write ($fh, "220 disable host completed\n");
2913
	    }
2914

    
2915
	} else {
2916
	    sock_write ($fh,  "520 command could not be executed\n");
2917
	}
2918

    
2919
    #
2920
    # enable watch, service or host
2921
    #
2922
    } elsif ($cmd eq "enable" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2923
	($cmd, $args) = split (/\s+/, $args, 2);
2924

    
2925
	#
2926
	# enable watch
2927
	#
2928
	if ($cmd eq "watch") {
2929
	    if (!defined (disen_watch ($args, 1))) {
2930
		sock_write ($fh,  "520 enable error, unknown watch\n");
2931
	    } else {
2932
		$stchanged++;
2933
		mysystem("$CF{MONREMOTE} enable watch $args") if ($CF{MONREMOTE});
2934
		sock_write ($fh,  "220 enable watch completed\n");
2935
	    }
2936

    
2937

    
2938
	#
2939
	# enable service
2940
	#
2941
	} elsif ($cmd eq "service") {
2942
	    ($group, $service) = split (/\s+/, $args, 2);
2943

    
2944
	    if (!defined (disen_service ($group, $service, 1))) {
2945
		sock_write ($fh,  "520 enable error, unknown group\n");
2946
	    } else {
2947
		$stchanged++;
2948
		mysystem("$CF{MONREMOTE} enable service $group $service") if ($CF{MONREMOTE});
2949
		sock_write ($fh,  "220 enable completed\n");
2950
	    }
2951

    
2952
	#
2953
	# enable host
2954
	#
2955
	} elsif ($cmd eq "host") {
2956
	    foreach my $h (split (/\s+/, $args)) {
2957
                foreach my $g (host_singleton_group($h)) {
2958
                    disen_watch($g, 1);
2959
		    mysystem("$CF{MONREMOTE} enable watch $g") if ($CF{MONREMOTE});
2960
                }
2961

    
2962
		disen_host ($h, 1);
2963
		mysystem("$CF{MONREMOTE} enable host $h") if ($CF{MONREMOTE});
2964
		$stchanged++;
2965
	    }
2966
	    sock_write ($fh,  "220 enable completed\n");
2967

    
2968
	} else {
2969
	    sock_write ($fh,  "520 command could not be executed\n");
2970
	}
2971

    
2972
    #
2973
    # server time
2974
    #
2975
    } elsif ($cmd eq "servertime" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
2976
	sock_write ($fh,  join ("", time, " ", scalar (localtime), "\n"));
2977
	sock_write ($fh,  "220 servertime completed\n");
2978

    
2979
    #
2980
    # check auth
2981
    #
2982
    } elsif ($cmd eq "checkauth") {
2983
	@_ = split(' ',$args);
2984
	$cmd = $_[0];
2985
	$user = $clients{$cl}->{"user"};
2986
	#  Note that we call check_auth without syslogging here.
2987
	if (check_auth($clients{$cl}->{"user"}, $cmd, 1))
2988
	{
2989
	    sock_write ($fh, "220 command authorized\n");
2990
	}
2991

    
2992
	else
2993
	{
2994
	    sock_write ($fh, "520 command could not be executed\n");
2995
	}
2996

    
2997

    
2998
    } else {
2999
	sock_write ($fh,  "520 command could not be executed, unknown command\n");
3000
    }
3001

    
3002
    save_state ("disabled") if ($stchanged);
3003
    syslog ('info', "finished client command \"$l\"")
3004
	if ($l !~ /^\s*login/i);
3005

    
3006
}
3007

    
3008

    
3009
sub client_write_opstatus {
3010
    my $fh = shift;
3011
    my ($group, $service) = @_;
3012

    
3013
    my $sref = \%{$watch{$group}->{$service}};
3014
    my $summary	= esc_str ($sref->{"_last_summary"}, 1);
3015
    my $detail	= esc_str ($sref->{"_last_detail"}, 1);
3016
    my $depend	= esc_str ($sref->{"depend"}, 1);
3017
    my $hostdepend	= esc_str ($sref->{"hostdepend"}, 1);
3018
    my $monitordepend	= esc_str ($sref->{"monitordepend"}, 1);
3019
    my $alertdepend	= esc_str ($sref->{"alertdepend"}, 1);
3020
    my $monitor	= esc_str ($sref->{"monitor"}, 1);
3021

    
3022
    my $comment;
3023
    if ($sref->{"_ack"} != 0) {
3024
	$comment = esc_str ($sref->{"_ack_comment"}, 1);
3025
    } else {
3026
	$comment = '';
3027
    }
3028

    
3029
    my $alerts_sent = 0;
3030
    my $l = 0;
3031
    foreach my $period (keys %{$sref->{"periods"}})
3032
    {
3033
    	$alerts_sent += $sref->{"periods"}->{$period}->{"_alert_sent"} if (!defined($sref->{"periods"}{$period}{"alerts_dont_count"}));
3034
	$l = $sref->{"periods"}->{$period}->{"_last_alert"}
3035
	    if (defined $sref->{"periods"}->{$period}->{"_last_alert"} && $sref->{"periods"}->{$period}->{"_last_alert"} > $l);
3036
    }
3037

    
3038
    my $buf = sprintf("group=$group service=$service opstatus=$sref->{_op_status} last_opstatus=%s exitval=%s timer=%s last_success=%s last_trap=%s last_traphost=%s last_check=%s ack=%s ackcomment=$comment alerts_sent=$alerts_sent depstatus=%s depend=$depend hostdepend=$hostdepend monitordepend=$monitordepend alertdepend=$alertdepend monitor=$monitor last_summary=%s last_detail=%s", (defined $sref->{_last_op_status} ? $sref->{_last_op_status} : ""), (defined $sref->{_exitval} ? $sref->{_exitval} : ""), (defined $sref->{_timer} ? $sref->{_timer} : ""), (defined $sref->{_last_success} ? $sref->{_last_success} : ""), (defined $sref->{_last_trap} ? $sref->{_last_trap} : ""), (defined $sref->{_last_traphost} ? $sref->{_last_traphost} : ""), (defined $sref->{_last_check} ? $sref->{_last_check} : ""), (defined $sref->{_ack} ? $sref->{_ack} : ""), (defined $sref->{"_depend_status"} ? int ($sref->{"_depend_status"}) : ""), $summary, $detail);
3039

    
3040
    $buf .= " last_failure=$sref->{_last_failure}"
3041
    	if ($sref->{"_last_failure"});
3042

    
3043

    
3044
    if ($sref->{"interval"})
3045
    {
3046
	$buf .= " interval=$sref->{interval}" .
3047
	    " monitor_duration=$sref->{_monitor_duration}" .
3048
	    " monitor_running=$sref->{_monitor_running}"
3049
    }
3050

    
3051
    $buf .= " exclude_period=". esc_str($sref->{exclude_period})
3052
	if ($sref->{"exclude_period"} ne "");
3053

    
3054
    $buf .= " exclude_hosts=" .
3055
	    esc_str(join (" ", keys %{$sref->{exclude_hosts}}))
3056
	if (keys %{$sref->{"exclude_hosts"}});
3057

    
3058
    $buf .= " randskew=$sref->{randskew}"
3059
	if ($sref->{"randskew"});
3060

    
3061

    
3062
    $buf .= " last_alert=$l"
3063
	if ($l);
3064

    
3065
    if ($sref->{"_first_failure"})
3066
    {
3067
	my $t = time - $sref->{"_first_failure"};
3068

    
3069
    	$buf .= " first_failure=$sref->{_first_failure}" .
3070
		" failure_duration=$t";
3071
    }
3072

    
3073
#    if ($sref->{"_first_success"})
3074
#    {
3075
#	my $t = time - $sref->{"_first_success"};
3076

    
3077
#    	$buf .= " first_success=$sref->{_first_success}" .
3078
#		" success_duration=$t";
3079
#    }
3080

    
3081
    $buf .= "\n";
3082

    
3083
    sock_write ($fh, $buf);
3084
}
3085

    
3086

    
3087
#
3088
# show usage
3089
#
3090
sub usage {
3091
    print <<"EOF";
3092
usage: mon [-a dir] [-A file] [-b dir] [-B dir] [-c config] [-d]
3093
           [-D dir] [-f] [-h] [-i secs] [-k num] [-l [type]] [-L dir]
3094
           [-M [path]] [-m num] [-p num] [-P file] [-r num] [-s dir]
3095
           [-S] [-t num]
3096
       mon -v
3097

    
3098
  -a dir	alert script dir
3099
  -A file	authorization file
3100
  -b dir	base directory for alerts and monitors (basedir)
3101
  -B dir	base directory for configuration files (cfbasedir)
3102
  -c config	config file, defaults to "mon.cf"
3103
  -d		debug
3104
  -D dir	state directory (statedir)
3105
  -f		fork and become a daemon
3106
  -h		this help
3107
  -i secs	sleep interval (seconds), defaults to 1
3108
  -k num	keep history of last num events
3109
  -l [type]	load some types of old state from statedir.  type can
3110
                be disabled (default), opstatus or all.
3111
  -L dir	log directory (logdir)
3112
  -M [path]	pre-process config file with m4.  if m4 isn't in \$PATH
3113
                specify the path to m4 here
3114
  -m num	throttle at maximum number of monitor processes
3115
  -O facility	syslog facility to use
3116
  -p num	server listens on port num
3117
  -P file	PID file
3118
  -r num	randomize startup schedule
3119
  -s dir	monitor script dir
3120
  -S		start with scheduler stopped
3121
  -t port	trap port
3122
  -v		print version
3123

    
3124
Report bugs to $AUTHOR
3125
$RCSID
3126
EOF
3127
}
3128

    
3129

    
3130
#
3131
# become a daemon
3132
#
3133
sub daemon {
3134
    my $pid;
3135

    
3136
    if ($pid = fork()) {
3137
	# the parent goes away all happy and stuff
3138
    	exit (0);
3139
    } elsif (!defined $pid) {
3140
    	die "could not fork: $!\n";
3141
    }
3142

    
3143
    setsid();
3144

    
3145
    #
3146
    # make it so that we cannot regain a controlling terminal
3147
    #
3148
    if ($pid = fork()) {
3149
	# the parent goes away all happy and stuff
3150
    	exit (0);
3151
    } elsif (!defined $pid) {
3152
	syslog ('err', "could not fork: $!");
3153
	exit 1;
3154
    }
3155

    
3156
#    chdir ('/');
3157
    umask (022);
3158

    
3159
    if (!open (N, "+>>" . $CF{"MONERRFILE"}))
3160
    {
3161
	syslog ("err", "could not open error output file $CF{'MONERRFILE'}: $!");
3162
	exit (1);
3163
    }
3164

    
3165
    select (N);
3166
    $| = 1;
3167
    select (STDOUT);
3168

    
3169
    if (!open (STDIN, "/dev/null"))
3170
    {
3171
	syslog ("err", "could not open STDIN from /dev/null: $!");
3172
	exit (1);
3173
    }
3174

    
3175
    print N "Mon starting at ".localtime(time)."\n";
3176
    if (!open(STDOUT, ">&N") ||
3177
	!open (STDERR, ">&N")) {
3178
        syslog ("err", "could not redirect: $!");
3179
	exit(1);
3180
    }
3181
    syslog ('info', "running as daemon");
3182
}
3183

    
3184

    
3185
#
3186
# debug
3187
#
3188
sub debug {
3189
    my ($level, @l) = @_;
3190

    
3191
    return if (!defined $opt{"d"} || $level > $opt{"d"});
3192

    
3193
    if ($opt{"d"} && !$opt{"f"}) {
3194
    	print STDERR @l;
3195
    } else {
3196
    	syslog ('debug', join ('', @l));
3197
    }
3198
}
3199

    
3200

    
3201
#
3202
# die_die
3203
#
3204
sub die_die {
3205
    my ($level, $msg) = @_;
3206

    
3207
    die "[$level] $msg\n" if ($opt{"d"});
3208

    
3209
    syslog ($level, "fatal, $msg");
3210
    closelog();
3211
    exit (1);
3212
}
3213

    
3214

    
3215
#
3216
# handle cleanup of exited processes
3217
# trigger alerts on failures (or send no alert if disabled)
3218
# do some accounting
3219
#
3220
sub proc_cleanup {
3221
    my ($summary, $tmnow, $buf);
3222

    
3223
    $tmnow = time;
3224
    return if (keys %running == 0);
3225

    
3226
    while ((my $p = waitpid (-1, &WNOHANG)) >0)
3227
    {
3228
	next if (!exists $runningpid{$p});
3229
	my ($group, $service) = split (/\//, $runningpid{$p});
3230
	my $sref = \%{$watch{$group}->{$service}};
3231

    
3232
	#
3233
	# suck in any extra data
3234
	#
3235
	my $fh = $fhandles{$runningpid{$p}};
3236
	while (my $z = sysread ($fh, $buf, 8192))
3237
	{
3238
	    $ibufs{$runningpid{$p}} .= $buf;
3239
	}
3240

    
3241
debug (1, "PID $p ($runningpid{$p}) exited with [" . int ($?>>8) . "]\n");
3242

    
3243
	$sref->{"_monitor_duration"} = $tmnow - $sref->{"_last_check"};
3244

    
3245
	$sref->{"_monitor_running"} = 0;
3246

    
3247
	process_event ("m", $group, $service, int ($?>>8), $ibufs{$runningpid{$p}});
3248

    
3249
	reset_timer ($group, $service);
3250

    
3251
	remove_proc ($p);
3252
    }
3253
}
3254

    
3255

    
3256
#
3257
# handle the event where a monitor exits or a trap is received
3258
#
3259
# $type is "m"  for monitor, "t" for trap
3260
#
3261
sub process_event {
3262
    my ($type, $group, $service, $exitval, $output) = @_;
3263

    
3264
debug (1, "process_event type=$type group=$group service=$service exitval=$exitval output=[$output]\n");
3265

    
3266
    my $sref = \%{$watch{$group}->{$service}};
3267
    my $tmnow = time;
3268

    
3269
    my ($summary, $detail) = split("\n", $output, 2);
3270

    
3271
    $sref->{"_exitval"} = $exitval;
3272

    
3273
    if ($sref->{"depend"} ne "" &&
3274
	    $sref->{"dep_behavior"} eq "a")
3275
    {
3276
	dep_ok ($sref, 'a');
3277
    }
3278

    
3279
    #
3280
    # error exit value
3281
    #
3282
    if ($exitval)
3283
    {
3284
	#
3285
	# accounting
3286
	#
3287
	$sref->{"_failure_count"}++;
3288
	$sref->{"_consec_failures"}++;
3289
	$sref->{"_last_failure"} = $tmnow;
3290
	if ($sref->{"_op_status"} == $STAT_OK ||
3291
		$sref->{"_op_status"} == $STAT_UNKNOWN ||
3292
		$sref->{"_op_status"} == $STAT_UNTESTED)
3293
	{
3294
	    $sref->{"_first_failure"} = $tmnow;
3295
	}
3296
	set_op_status ($group, $service, $STAT_FAIL);
3297

    
3298
	$summary = "(NO SUMMARY)" if ($summary =~ /^\s*$/m);
3299
	$sref->{"_last_summary"} = $summary;
3300
	$sref->{"_last_detail"} = $detail;
3301
	shift @last_failures if (@last_failures > $CF{"MAX_KEEP"});
3302
	push @last_failures, "$group $service" .
3303
	    " $tm $summary";
3304
	syslog ('crit', "failure for $last_failures[-1]");
3305

    
3306
	#
3307
	# send an alert if necessary
3308
	#
3309
	if ($type eq "m")
3310
	{
3311
	    do_alert ($group, $service, $output, $exitval, $FL_MONITOR);
3312
	    #
3313
	    # change interval if needed
3314
	    #
3315
	    if (defined ($sref->{"failure_interval"}) &&
3316
	    		!defined $sref->{"_old_interval"})
3317
	    {
3318
		$sref->{"_old_interval"} = $sref->{"interval"};
3319
		$sref->{"interval"} = $sref->{"failure_interval"};
3320
		$sref->{"_next_check"} = 0;
3321
	    }
3322
	}
3323

    
3324
	elsif ($type eq "t")
3325
	{
3326
	    do_alert ($group, $service, $output, $exitval, $FL_TRAP);
3327
	}
3328

    
3329
	elsif ($type eq "T")
3330
	{
3331
	    do_alert ($group, $service, $output, $exitval, $FL_TRAPTIMEOUT);
3332
	}
3333

    
3334
	$sref->{"_failure_output"} = $output;
3335
    }
3336

    
3337
    #
3338
    # success exit value
3339
    #
3340
    else
3341
    {
3342
	if ($CF{"DTLOGGING"} && defined ($sref->{"_op_status"}) &&
3343
	       $sref->{"_op_status"} == $STAT_FAIL)
3344
	{
3345
	    write_dtlog ($sref, $group, $service);
3346
	}
3347

    
3348
	my $old_status = $sref->{"_op_status"};
3349
	set_op_status ($group, $service, $STAT_OK);
3350

    
3351
	if ($type eq "t")
3352
	{
3353
	    $sref->{"_last_uptrap"} = $tmnow;
3354
	}
3355

    
3356
	#
3357
	# if this service has just come back up and
3358
	# we are paying attention to this event,
3359
	# let someone know
3360
	#
3361
	if (($sref->{"redistribute"} ne '') ||
3362
	    ((defined ($sref->{"_op_status"})) &&
3363
	     ($old_status == $STAT_FAIL) &&
3364
	     (defined($sref->{"_upalert"})) && 
3365
	     (!defined($sref->{"upalertafter"}) 
3366
	      || (($tmnow - $sref->{"_first_failure"}) >= $sref->{"upalertafter"}))))
3367
	{
3368
	    # Save the last failing monitor's output for posterity
3369
	    $sref->{"_upalertoutput"}= $sref->{"_last_output"};
3370
	    do_alert ($group, $service, $sref->{"_upalertoutput"}, 0, $FL_UPALERT);
3371
	}
3372

    
3373
	#
3374
	# send also when no upalertafter set
3375
	# cabo: Modified to always send
3376
	#
3377
	#elsif (defined($sref->{"_upalert"}) && $old_status == $STAT_FAIL)
3378
	elsif (defined($sref->{"_upalert"}) && ($old_status == $STAT_FAIL || $old_status == $STAT_UNTESTED))
3379
	{
3380
	    do_alert ($group, $service, $sref->{"_upalertoutput"}, 0, $FL_UPALERT);
3381
	}
3382

    
3383
	$sref->{"_ack"} = 0;
3384
	$sref->{"_ack_comment"} = '';
3385
	$sref->{"_first_failure"} = 0;
3386
	$sref->{"_last_failure"} = 0;
3387
	$sref->{"_consec_failures"} = 0;
3388
	$sref->{"_failure_output"} = "";
3389
	$sref->{"_last_summary"} = $summary;
3390
	$sref->{"_last_detail"} = $detail;
3391

    
3392
	#
3393
	# reset the alertevery timer
3394
	#
3395
	foreach my $period (keys %{$sref->{"periods"}})
3396
	{
3397
	    #
3398
	    # "alertevery strict" should not reset _last_alert
3399
	    #
3400
	    if (!$sref->{"periods"}->{$period}->{"_alertevery_strict"})
3401
	    {
3402
	      $sref->{"periods"}->{$period}->{"_last_alert"} = 0;
3403
	    }
3404

    
3405
	    $sref->{"periods"}->{$period}->{"_1stfailtime"} = 0;
3406
	    $sref->{"periods"}->{$period}->{"_alert_sent"} = 0;
3407
	}
3408

    
3409
	#
3410
	# change interval back to original
3411
	#
3412
	if (defined ($sref->{"failure_interval"}) &&
3413
		    $sref->{"_old_interval"} != undef)
3414
	{
3415
	    $sref->{"interval"} = $sref->{"_old_interval"};
3416
	    $sref->{"_old_interval"} = undef;
3417
	    $sref->{"_next_check"} = 0;
3418
	}
3419

    
3420
	$sref->{"_last_success"} = $tmnow;
3421

    
3422
    }
3423

    
3424
    #
3425
    # save the output
3426
    #
3427
    $sref->{"_last_output"} = $output;
3428
    $sref->{"_last_summary"} = $summary;
3429
    $sref->{"_last_detail"} = $detail;
3430
}
3431

    
3432

    
3433
#
3434
# collect output from running processes
3435
#
3436
sub collect_output {
3437
    my ($buf, $rout);
3438

    
3439
    return if (!keys %running);
3440

    
3441
    my $nfound = select ($rout=$fdset_rbits, undef, undef, 0);
3442
debug (1, "select returned $nfound file handles\n");
3443

    
3444
    return if ($! == &EINTR);
3445

    
3446
    if ($nfound) {
3447
	#
3448
	# look for the file descriptors that are readable,
3449
	# and try to read as much as possible from them
3450
	#
3451
	foreach my $k (keys %fhandles) {
3452
	    my $fh = $fhandles{$k};
3453
	    if (vec ($rout, fileno($fh), 1) == 1) {
3454
		my $z = 0;
3455
		while ($z = sysread ($fh, $buf, 8192)) {
3456
		    $ibufs{$k} .= $buf;
3457
debug (1, "[$buf] from $fh\n");
3458
		}
3459

    
3460
		#
3461
		# ignore if EAGAIN, since we're nonblocking
3462
		#
3463
		if (!defined($z) && $! == &EAGAIN) {
3464

    
3465
		#
3466
		# error on this descriptor
3467
		#
3468
		} elsif (!defined($z)) {
3469
debug (1, "error on $fh: $!\n");
3470
		    syslog ('err', "error on $fh: $!");
3471
		    vec($fdset_rbits, fileno($fh), 1) = 0;
3472
		} elsif ($z == 0 && $! == &EAGAIN) {
3473
debug (1, "EAGAIN on $fh\n");
3474

    
3475
		#
3476
		# if EOF encountered, stop trying to
3477
		# get input from this file descriptor
3478
		#
3479
		} elsif ($z == 0) {
3480
debug (1, "EOF on $fh\n");
3481
		    vec($fdset_rbits, fileno($fh), 1) = 0;
3482

    
3483
		}
3484
	    }
3485
	}
3486
    }
3487
}
3488

    
3489

    
3490

    
3491

    
3492
#
3493
# handle forking a monitor process, and set up variables
3494
#
3495
sub run_monitor {
3496
    my ($group, $service) = @_;
3497
    my (@args, @groupargs, $pid, @ghosts, $monitor, $monitorargs);
3498

    
3499
    my $sref = \%{$watch{$group}->{$service}};
3500

    
3501
    ($monitor, $monitorargs) = ($sref->{"monitor"} =~ /^(\S+)(\s+(.*))?$/);
3502

    
3503
    if (!defined $MONITORHASH{$monitor} || ! -f $MONITORHASH{$monitor}) {
3504
	syslog ('err', "no monitor found while trying to run [$monitor]");
3505
	return undef;
3506
    } else {
3507
    	$monitor = $MONITORHASH{$monitor};
3508
    }
3509

    
3510
    $monitor .= " " . $monitorargs if ($monitorargs);
3511

    
3512
    @ghosts = ();
3513

    
3514
    #
3515
    # if monitor ends with ";;", do not append groups
3516
    # to command line
3517
    #
3518
    if ($monitor =~ /;;\s*$/) {
3519
	$monitor =~ s/\s*;;\s*$//;
3520
	@args = quotewords ('\s+', 0, $monitor);
3521
	@ghosts = (1);
3522

    
3523
    #
3524
    # exclude disabled hosts
3525
    #
3526
    } else {
3527
	@ghosts = grep (!/^\*/, @{$groups{$group}});
3528

    
3529
	#
3530
	# per-service excludes
3531
	#
3532
	if (keys %{$sref->{"exclude_hosts"}})
3533
	{
3534
	    my @g = ();
3535

    
3536
	    for (my $i=0; $i<@ghosts; $i++)
3537
	    {
3538
		push (@g, $ghosts[$i])
3539
		    if !$sref->{"exclude_hosts"}->{$ghosts[$i]};
3540
	    }
3541

    
3542
	    @ghosts = @g;
3543
	}
3544

    
3545
	#
3546
	# per-host dependencies
3547
	#
3548
	if ((defined $sref->{"depend"} && $sref->{"depend"} ne "" && $sref->{"dep_behavior"} eq 'hm')
3549
	    || (defined $sref->{"hostdepend"} && $sref->{"hostdepend"} ne ""))
3550
	{
3551
	    my @g = ();
3552
	    my $sum = dep_summary($sref);
3553

    
3554
	    for (my $i=0; $i<@ghosts; $i++)
3555
	    {
3556
		push (@g, $ghosts[$i])
3557
		    if (! grep /\Q$ghosts[$i]\E/, @$sum);
3558
	    }
3559

    
3560
	    @ghosts = @g;
3561
	}
3562

    
3563
	@args = (quotewords ('\s+', 0, $monitor), @ghosts);
3564
    }
3565

    
3566
    if (@ghosts == 0 && !defined ($sref->{"allow_empty_group"}))
3567
    {
3568
    	syslog ('err', "monitor for $group/$service" .
3569
		" not called because of no host arguments\n");
3570
    	reset_timer ($group, $service);
3571
    }
3572

    
3573
    else
3574
    {
3575
	$fhandles{"$group/$service"} = new FileHandle;
3576

    
3577
	$pid = open ($fhandles{"$group/$service"}, '-|');
3578

    
3579
	if (!defined $pid)
3580
	{
3581
	    syslog ('err', "Could not fork: $!");
3582
	    delete $fhandles{"$group/$service"};
3583
	    return 0;
3584
	}
3585

    
3586
	elsif ($pid == 0)
3587
	{
3588
	    open(STDERR, '>&STDOUT')
3589
		or syslog ('err', "Could not dup stderr: $!");
3590

    
3591
	    open(STDIN, "</dev/null")
3592
		or syslog ('err', "Could not connect stdin to /dev/null: $!");
3593

    
3594
	    my $v;
3595

    
3596
	    foreach $v (keys %{$sref->{"ENV"}})
3597
	    {
3598
	    	$ENV{$v} = $sref->{"ENV"}->{$v};
3599
	    }
3600
	    $ENV{"MON_GROUP"}		= $group;
3601
	    $ENV{"MON_SERVICE"}		= $service;
3602
	    $ENV{"MON_LAST_SUMMARY"} = $sref->{"_last_summary"} if (defined $sref->{"_last_summary"});
3603
	    $ENV{"MON_LAST_OUTPUT"} = $sref->{"_last_output"} if (defined $sref->{"_last_output"});
3604
	    $ENV{"MON_LAST_FAILURE"} = $sref->{"_last_failure"} if (defined $sref->{"_last_failure"});
3605
	    $ENV{"MON_FIRST_FAILURE"} = $sref->{"_first_failure"} if (defined $sref->{"_first_failure"});
3606
	    $ENV{"MON_DEPEND_STATUS"} = $sref->{"_depend_status"} if (defined $sref->{"_depend_status"});
3607
	    $ENV{"MON_FIRST_SUCCESS"} = $sref->{"_first_success"} if (defined $sref->{"_first_success"});
3608
	    $ENV{"MON_LAST_SUCCESS"} = $sref->{"_last_success"} if (defined $sref->{"_last_success"});
3609
	    $ENV{"MON_DESCRIPTION"} = $sref->{"description"} if (defined $sref->{"description"});
3610
	    $ENV{"MON_STATEDIR"} = $CF{"STATEDIR"};
3611
	    $ENV{"MON_LOGDIR"} = $CF{"LOGDIR"};
3612
	    $ENV{"MON_CFBASEDIR"} = $CF{"CFBASEDIR"};
3613

    
3614
	    if (!exec @args)
3615
	    {
3616
	    	syslog ('err', "could not exec '@args': $!");
3617
		exit (1);
3618
	    }
3619
	}
3620

    
3621
	$sref->{"_last_check"} = scalar (time);
3622
	$sref->{"_monitor_running"} = 1;
3623

    
3624
debug (1, "watching file handle ", fileno ($fhandles{"$group/$service"}),
3625
    " for $group/$service\n");
3626

    
3627
	#
3628
	# set nonblocking I/O and setup bit vector for select(2)
3629
	#
3630
	configure_filehandle ($fhandles{"$group/$service"}) ||
3631
		syslog ("err", "could not configure filehandle for $group/$service: $!");
3632
	vec ($fdset_rbits,
3633
	    fileno($fhandles{"$group/$service"}), 1) = 1;
3634
	$fdset_ebits |= $fdset_rbits;
3635

    
3636
	#
3637
	# note that this is running
3638
	#
3639
	$running{"$group/$service"} = 1;
3640
	$runningpid{$pid} = "$group/$service";
3641
	$ibufs{"$group/$service"} = "";
3642
	$procs++;
3643
    }
3644

    
3645
    if ($sref->{"_next_check"})
3646
    {
3647
	$sref->{"_next_check"} += $sref->{"interval"};
3648
    } else {
3649
	$sref->{"_next_check"} = time() + $sref->{"interval"};
3650
    }
3651

    
3652

    
3653

    
3654

    
3655
}
3656

    
3657

    
3658
#
3659
# set the countdown timer for this service
3660
#
3661
sub reset_timer {
3662
    my ($group, $service) = @_;
3663

    
3664
    my $sref = \%{$watch{$group}->{$service}};
3665

    
3666
    if ($sref->{"randskew"} != 0)
3667
    {
3668
    	$sref->{"_timer"} = $sref->{"interval"} +
3669
	     (int (rand (2)) == 0 ? -int(rand($sref->{"randskew"}) + 1) :
3670
	     	int(rand($sref->{"randskew"})+1));
3671
    }
3672

    
3673
    elsif ($sref->{"_next_check"})
3674
    {
3675
    	if (($sref->{"_timer"} = $sref->{"_next_check"} - time()) < 0)
3676
	{
3677
	    $sref->{"_timer"} = $sref->{"interval"};
3678
	}
3679
    }
3680

    
3681
    else
3682
    {
3683
	$sref->{"_timer"} = $sref->{"interval"};
3684
    }
3685
}
3686

    
3687

    
3688
#
3689
# randomize the delay before each test
3690
# $opt{"randstart"} is seconds
3691
#
3692
sub randomize_startdelay {
3693
    my ($group, $service);
3694

    
3695
    foreach $group (keys %watch) {
3696
	foreach $service (keys %{$watch{$group}}) {
3697
            $watch{$group}->{$service}->{"_timer"} =
3698
                int (rand ($CF{"RANDSTART"}));
3699
        }
3700
    }
3701

    
3702
}
3703

    
3704

    
3705
#
3706
# return 1 if $val is within $range,
3707
# where $range = "number" or "number-number"
3708
#
3709
sub inRange {
3710
    my ($val, $range) = @_;
3711
    my ($retval);
3712

    
3713
    $retval = 0;
3714
    if ($range =~ /^(\d+)$/ && $val == $1) {
3715
        $retval = 1
3716

    
3717
    } elsif ($range =~ /^(\d+)\s*-\s*(\d+)$/ &&
3718
	    ($val >= $1 && $val <= $2)) {
3719
        $retval = 1
3720
    }
3721

    
3722
    $retval;
3723
}
3724

    
3725

    
3726
#
3727
# disable ($cmd==0) or enable a watch
3728
#
3729
sub disen_watch {
3730
    my ($w, $cmd) = @_;
3731

    
3732
    return undef if (!defined ($watch{$w}));
3733
    if (!$cmd) {
3734
	$watch_disabled{$w} = 1;
3735
    } else {
3736
	$watch_disabled{$w} = 0;
3737
    }
3738
}
3739

    
3740

    
3741
#
3742
# disable ($cmd==0) or enable a service
3743
#
3744
sub disen_service {
3745
    my ($g, $s, $cmd) = @_;
3746
    my ($snum);
3747

    
3748
    return undef if (!defined $watch{$g});
3749
    return undef if (!defined $watch{$g}->{$s});
3750
    if (!$cmd) {
3751
	$watch{$g}->{$s}->{"disable"} = 1;
3752
    } else {
3753
	$watch{$g}->{$s}->{"disable"} = 0;
3754
    }
3755
}
3756

    
3757

    
3758
#
3759
# disable ($cmd==0) or enable a host
3760
#
3761
sub disen_host {
3762
    my ($h, $cmd) = @_;
3763

    
3764
    my $found = undef;
3765

    
3766
    foreach my $g (keys %groups) {
3767
	if ((!defined $cmd) || $cmd == 0) {
3768
	    if (grep (s/^$h$/*$h/, @{$groups{$g}}))
3769
	    {
3770
		$found = 1;
3771
	    }
3772
	}
3773
	else
3774
	{
3775
	    if (grep (s/^\*$h$/$h/, @{$groups{$g}}))
3776
	    {
3777
		$found = 1;
3778
	    }
3779
	}
3780
    }
3781

    
3782
    $found;
3783
}
3784

    
3785

    
3786
sub host_exists {
3787
    my $host = shift;
3788

    
3789
    my $found = 0;
3790

    
3791
    foreach my $g (keys %groups) {
3792
    	if (grep (/^$host$/, @{$groups{$g}}))
3793
	{
3794
	    $found = 1;
3795
	    last;
3796
	}
3797
    }
3798

    
3799
    $found;
3800
}
3801

    
3802

    
3803

    
3804
#
3805
# given a host, search groups and return an array of group
3806
# names which have that host as their only member. return
3807
# an empty array if no group found
3808
# 
3809
#
3810
sub host_singleton_group {
3811
    my $host = shift;
3812

    
3813
    my @found;
3814

    
3815
    foreach my $g (keys %groups) {
3816
    	if (grep (/^\*?$host$/, @{$groups{$g}}) &&
3817
            scalar(@{$groups{$g}}) == 1)
3818
	{
3819
	    push (@found, $g);
3820
	}
3821
    }
3822

    
3823
    return (@found);
3824
}
3825

    
3826

    
3827
#
3828
# save state
3829
#
3830
sub save_state {
3831
    my (@states) = @_;
3832
    my ($group, $service, @l, $state);
3833

    
3834
    foreach $state (@states) {
3835
	if ($state eq "disabled" || $state eq "all") {
3836
	    if (!open (STATE, ">$CF{STATEDIR}/disabled")) {
3837
		syslog ("err", "could not write to state file: $!");
3838
		next;
3839
	    }
3840

    
3841
	    foreach $group (keys %groups) {
3842
		@l = grep (/^\*/, @{$groups{$group}});
3843
		if (@l) {
3844
		    grep (s/^\*//, @l);
3845
		    grep { print STATE "disable host $_\n" } @l;
3846
		}
3847
	    }
3848
	    foreach $group (keys %watch) {
3849
		if (exists $watch_disabled{$group} && $watch_disabled{$group} == 1) {
3850
		    print STATE "disable watch $group\n";
3851
		}
3852
		foreach $service (keys %{$watch{$group}}) {
3853
		    if (defined $watch{$group}->{$service}->{'disable'} 
3854
			&& $watch{$group}->{$service}->{'disable'} == 1) {
3855
			print STATE "disable service $group $service\n";
3856
		    }
3857
		}
3858
	    }
3859
	    close (STATE);
3860

    
3861
	}
3862

    
3863
	if ($state eq "opstatus" || $state eq "all") {
3864
	    if (!open (STATE, ">$CF{STATEDIR}/opstatus")) {
3865
		syslog ("err", "could not write to opstatus state file: $!");
3866
		next;
3867
	    }
3868
	    foreach $group (keys %watch) {
3869
	    	foreach $service (keys %{$watch{$group}}) {
3870
		    print STATE "group=$group\tservice=$service";
3871
		    foreach my $var (qw(op_status failure_count alert_count last_success first_success
3872
					consec_failures last_failure first_failure last_summary 
3873
					last_failure_time last_failure_summary last_failure_detail
3874
					last_detail ack ack_comment last_trap last_traphost exitval 
3875
					last_check last_op_status failure_output trap_timer)) {
3876
			print STATE "\t$var=" . esc_str($watch{$group}->{$service}->{"_$var"});
3877
		    }
3878
		    foreach my $periodlabel (keys %{$watch{$group}->{$service}->{periods}}) {
3879
			foreach my $var (qw(last_alert alert_sent 1stfailtime failcount)) {
3880
			    print STATE "\t$periodlabel:$var=" . esc_str($watch{$group}->{$service}{periods}{$periodlabel}{"_$var"});
3881
			}
3882
		    }
3883
		    print STATE "\n";
3884
		}
3885
	    }
3886
	    close (STATE);
3887
	}
3888
    }
3889
}
3890

    
3891

    
3892
#
3893
# load state
3894
#
3895
sub load_state {
3896
    my (@states) = @_;
3897
    my ($l, $cmd, $args, $group, $service, $what, $state);
3898

    
3899
    foreach $state (@states) {
3900
    	if ($state eq "disabled" || $state eq "all") {
3901
	    if (!open (STATE, "$CF{STATEDIR}/disabled")) {
3902
		syslog ("err", "could not read state file: $!");
3903
		next;
3904
	    }
3905

    
3906
	    while (defined ($l = <STATE>)) {
3907
		chomp $l;
3908
		($cmd, $what, $args) = split (/\s+/, $l, 3);
3909

    
3910
		next if ($cmd ne "disable");
3911

    
3912
		if ($what eq "host") {
3913
		    disen_host ($args);
3914
		} elsif ($what eq "watch") {
3915
		    syslog ("err", "undefined watch reading state file: $l")
3916
			if (!defined disen_watch ($args));
3917
		} elsif ($what eq "service") {
3918
		    ($group, $service) = split (/\s+/, $args, 2);
3919
		    syslog ("err",
3920
		    	"undefined group or service reading state file: $l")
3921
			if (!defined disen_service ($group, $service));
3922
		}
3923
	    }
3924

    
3925
	    syslog ("info", "state '$state' loaded");
3926
	    close (STATE);
3927
	}
3928

    
3929
	if ($state eq "opstatus" || $state eq "all") {
3930
	    if (!open (STATE, "$CF{STATEDIR}/opstatus")) {
3931
		syslog ("err", "could not read state file: $!");
3932
		next;
3933
	    }
3934

    
3935
	    while (defined ($l = <STATE>)) {
3936
		chomp $l;
3937
		my %opstatus = map{ /^(.*)=(.*)$/; $1 => $2} split (/\t/, $l,);
3938
		next unless (exists $opstatus{group} && exists $watch{$opstatus{group}} 
3939
			     && exists $opstatus{service} && exists $watch{$opstatus{group}}->{$opstatus{service}});
3940

    
3941
		foreach my $op (keys %opstatus) {
3942
		    next if ($op eq 'group' || $op eq 'service');
3943
		    if ($op =~ /^(.*):(.*)$/) {
3944
			next unless exists $watch{$opstatus{group}}->{$opstatus{service}}{periods}{$1};
3945
			$watch{$opstatus{group}}->{$opstatus{service}}{periods}{$1}{"_$2"} = un_esc_str($opstatus{$op});
3946
		    } else {
3947
			$watch{$opstatus{group}}->{$opstatus{service}}{"_$op"} = un_esc_str($opstatus{$op});
3948
		    }
3949
		}
3950
	    }
3951
	    syslog ("info", "state '$state' loaded");
3952
	    close (STATE);
3953
	}
3954
    }
3955
}
3956

    
3957

    
3958
#
3959
# authenticate a login
3960
#
3961
sub auth {
3962
    my ($type, $user, $plaintext, $host) = @_;
3963
    my ($pass, %u, $l, $u, $p);
3964

    
3965

    
3966
    if ($user eq "" || ($type ne 'trustlocal' && $plaintext eq "")) {
3967
	syslog ('err', "an undef username or password supplied");
3968
    	return undef;
3969
    }
3970

    
3971
    #
3972
    # standard UNIX passwd
3973
    #
3974
    if ($type eq "getpwnam") {
3975
	(undef, $pass) = getpwnam($user);
3976
	return undef
3977
	    if (!defined $pass);
3978

    
3979
	if ((crypt ($plaintext, $pass)) ne $pass) {
3980
	    return undef;
3981
	}
3982
	return 1;
3983

    
3984
    #
3985
    # shadow password
3986
    #
3987
    } elsif ($type eq "shadow") {
3988

    
3989
    #
3990
    # "mon" authentication
3991
    #
3992
    } elsif ($type eq "userfile") {
3993
    	if (!open (U, $CF{"USERFILE"})) {
3994
	    syslog ('err', "could not open user file '$CF{USERFILE}': $!");
3995
	    return undef;
3996
	}
3997
	while (<U>) {
3998
	    next if (/^\s*#/ || /^\s*$/);
3999
	    chomp;
4000
	    ($u,$p) = split (/\s*:\s*/, $_, 2);
4001
	    $u{$u} = $p;
4002
	}
4003
	close (U);
4004
        return undef if (!defined($u{$user}));  #user was not found in userfile
4005
	return undef if ((crypt ($plaintext, $u{$user})) ne $u{$user}); #user gave wrong password
4006
	return 1;
4007

    
4008
    #
4009
    # PAM authentication
4010
    #
4011
    } elsif ($type eq "pam") {
4012
	local $PAM_username = $user;
4013
	local $PAM_password = $plaintext;
4014
    	my $pamh;
4015
	if (!ref($pamh = new Authen::PAM($CF{'PAMSERVICE'}, $PAM_username, \&pam_conv_func))) {
4016
	    syslog ('err', "Error code $pamh during PAM init!: $!");
4017
	    return undef;
4018
	}
4019
	my $res = $pamh->pam_authenticate ;
4020
	return undef if ($res != &Authen::PAM::PAM_SUCCESS) ;
4021
	return 1;
4022
    } elsif ($type eq "trustlocal") {
4023
      # We're configured to trust all authentications from localhost
4024
      # i.e. cgi scripts are handling authentication themselves
4025
      return undef if ($host ne "127.0.0.1");
4026
      return 1;
4027
    } else {
4028
    	syslog ('err', "authentication type '$type' not known");
4029
    }
4030

    
4031
    return undef;
4032
}
4033

    
4034

    
4035
#
4036
# load the table of who can do which commands
4037
#
4038
sub load_auth {
4039
    my ($startup) = @_;
4040
    my ($l, $cmd, $users, $u, $host, $user, $password, $sect);
4041

    
4042
    %AUTHCMDS = ();
4043
    %NOAUTHCMDS = ();
4044
    %AUTHTRAPS = ();
4045
    $sect = "command";
4046

    
4047
    if (!open (C, $CF{"AUTHFILE"})) {
4048
	err_startup ($startup, "could not open $CF{AUTHFILE}: $!");
4049
	return undef;
4050
    }
4051

    
4052
    while (defined ($l = <C>)) {
4053
	next if ($l =~ /^\s*#/ || $l =~ /^\s*$/);
4054
	chomp $l;
4055
	$l =~ s/^\s*//;
4056
	$l =~ s/\s*$//;
4057

    
4058
	if ($l =~ /^command\s+section/) {
4059
	    $sect = "command";
4060
	    next;
4061
	} elsif ($l =~ /^trap\s+section/) {
4062
	    $sect = "trap";
4063
	    next;
4064
	}
4065

    
4066
	if ($sect eq "command") {
4067
	    ($cmd, $users) = split (/\s*:\s*/, $l, 2);
4068
	    if (!defined $users) {
4069
		err_startup ($startup, "could not parse line $. of auth file\n");
4070
		next;
4071
	    }
4072
	    foreach $u (split (/\s*,\s*/, $users)) {
4073
		if ( $u =~ /^AUTH_ANY$/ ) {
4074
		    # Allow all authenticated users
4075
		    $AUTHCMDS{"\L$cmd"}{$u} = 1;
4076
		} elsif ( $u =~ /^!(.*)/ ) {
4077
		    # Directive is to "deny-user"
4078
		    $NOAUTHCMDS{"\L$cmd"}{$1} = 1;
4079
		} else {
4080
		    # Directive is to "allow-user"
4081
		    $AUTHCMDS{"\L$cmd"}{$u} = 1;
4082
		}
4083
	    }
4084

    
4085
	} elsif ($sect eq "trap") {
4086
	    if ($l !~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
4087
		syslog ('err', "invalid entry in trap sect of $CF{AUTHFILE}, line $.");
4088
	    	next;
4089
	    }
4090
	    ($host, $user, $password) = ($1, $2, $3);
4091

    
4092
	    if ($host eq "*") {
4093
		#
4094
	    	# allow traps from all hosts
4095
		#
4096

    
4097
 	    } elsif ($host =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/) {
4098
  	        if (($host = inet_aton ($host)) eq "") {
4099
  		    syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
4100
  		    next;
4101
  		}
4102
 	    } elsif ($host =~ /^[A-Z\d][[A-Z\.\d\-]*[[A-Z\d]+$/i) {
4103
 	        if (($host = inet_aton ($host)) eq "") {
4104
  		    syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
4105
  		    next;
4106
  		}
4107
	    } else {
4108
	    	syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
4109
		next;
4110
	    }
4111

    
4112
	    if ($host ne "*")
4113
	    {
4114
		$host = inet_ntoa ($host);
4115
	    }
4116

    
4117
	    syslog ('notice', "Adding trap auth of: $host $user $password");
4118
	    $AUTHTRAPS{$host}{$user} = $password;
4119

    
4120
	} else {
4121
	    syslog ('err', "unknown section in $CF{AUTHFILE}: $l");
4122
	}
4123
    }
4124
    close (C);
4125
}
4126

    
4127
sub load_view_users {}
4128

    
4129
sub view_match {
4130
    my ($view, $group, $service) = @_;
4131
    if (!defined($view)) {
4132
#	print STDERR "No view in use\n";
4133
	return 1;
4134
    }
4135

    
4136
    if (defined($group) && defined($views{$view}->{$group})) {
4137
#	print STDERR "View $view contains $group\n";
4138
	return 1;
4139
    }
4140
    if (defined($views{$view}->{$group.":".$service})) {
4141
#	print STDERR "View $view contains $group:$service\n";
4142
	return 1;
4143
    }
4144
    return 0;
4145
}
4146

    
4147
#
4148
# return undef if $user isn't permitted to perform $cmd
4149
# Optional third argument controls logging to syslog.
4150
# e.g.,
4151
#  check_auth("joe", "disable")
4152
#   will check to see if user joe is authorized to disable, and
4153
#   complain to syslog if joe is not authorized
4154
#  check_auth("joe", "disable", 1)
4155
#   will check to see if user joe is authorized to disable but 
4156
#   NOT complain to syslog if joe is not authorized
4157
#
4158
sub check_auth {
4159
    my ($user, $cmd, $no_syslog) = @_;
4160

    
4161
    #
4162
    # Check to see if the authenticated user is specifically 
4163
    # denied the ability to run this command.
4164
    #
4165
    if (
4166
	(defined ($user) && $NOAUTHCMDS{$cmd}{$user}) ||
4167
	(defined ($user) && $NOAUTHCMDS{$cmd}{"AUTH_ANY"}) 
4168
	)
4169
    {
4170
	syslog ("err", "user '$user' tried '$cmd', denied");
4171
	return undef;
4172
    }
4173

    
4174
    #
4175
    # Check for "all". This allows any client, authenticated or
4176
    # not, to execute the requested command.
4177
    #
4178
    return 1 if ($AUTHCMDS{$cmd}{"all"});
4179

    
4180
    #
4181
    # Check for AUTH_ANY. This allows any authenticated user to 
4182
    # execute the requested command.
4183
    #
4184
    return 1 if (defined ($user) && $AUTHCMDS{$cmd}{"AUTH_ANY"});
4185

    
4186
    #
4187
    # Check to see if the authenticated user is specifically 
4188
    #allowed the ability to run this command.
4189
    #
4190
    return 1 if (defined ($user) && $AUTHCMDS{$cmd}{$user});
4191

    
4192
    syslog ("err", "user '$user' tried '$cmd', not authenticated") unless defined($no_syslog);
4193

    
4194
    return undef;
4195
}
4196

    
4197

    
4198
#
4199
# reload things
4200
#
4201
sub reload {
4202
    my (@what) = @_;
4203

    
4204
    for (@what) {
4205
    	if ($_ eq "auth") {
4206
	    load_auth;
4207
	} else {
4208
	    return undef;
4209
	}
4210
    }
4211

    
4212
    return 1;
4213
}
4214

    
4215

    
4216
sub err_startup {
4217
    my ($startup, $msg) = @_;
4218

    
4219
    if ($startup) {
4220
    	die "$msg\n";
4221
    } else {
4222
    	syslog ('err', $msg);
4223
    }
4224
}
4225

    
4226

    
4227
#
4228
# handle a trap
4229
#
4230
sub handle_trap {
4231
    my ($buf, $from) = @_;
4232

    
4233
    my $time = time;
4234
    my %trap = ();
4235
    my $flags = 0;
4236
    my $tmnow = time;
4237
    my $intended;
4238
    my $fromip;
4239

    
4240
#
4241
# MON-specific tags
4242
# pro	protocol
4243
# aut	auth
4244
# usr	username
4245
# pas	password
4246
# typ	type  ("failure", "up", "startup", "trap", "traptimeout")
4247
# spc	specific type (STAT_OK, etc.) THIS IS NO LONGER USED
4248
# seq	sequence
4249
# grp	group
4250
# svc	service
4251
# hst	host
4252
# sta	status (same as exit status of a monitor)
4253
# tsp	timestamp as time(2) value
4254
# sum	summary output
4255
# dtl	detail
4256
#
4257

    
4258
    #
4259
    # this part validates the trap
4260
    #
4261
    {
4262
	foreach my $line (split (/\n/, $buf))
4263
	{
4264
	    if ($line =~ /^(\w+)=(.*)/)
4265
	    {
4266
		my $trap_name = $1;
4267
		my $trap_val = $2;
4268
		chomp $trap_val;
4269
		$trap_val =~ s/^\'(.*)\'$/\1/;
4270
		$trap{$trap_name} = un_esc_str ($trap_val);
4271
	    }
4272

    
4273
	    else
4274
	    {
4275
		syslog ('err', "unspecified tag in trap: $line");
4276
	    }
4277
	}
4278

    
4279
	$trap{"sum"} = "$trap{sum}\n" if ($trap{"sum"} !~ /\n$/);
4280

    
4281
	my ($port, $addr) = sockaddr_in ($from);
4282
	$fromip = inet_ntoa ($addr);
4283

    
4284
	#
4285
	# trap authentication
4286
	#
4287
	my ($traphost, $trapuser, $trappass);
4288

    
4289
	if (defined ($AUTHTRAPS{"*"}))
4290
	{
4291
	    $traphost = "*";
4292
	}
4293
	
4294
	else
4295
	{
4296
	    $traphost = $fromip;
4297
	}
4298

    
4299
	if (defined ($AUTHTRAPS{$traphost}{"*"}))
4300
	{
4301
	    $trapuser = "*";
4302
	    $trappass = "";
4303
	}
4304

    
4305
	else
4306
	{
4307
	    $trapuser = $trap{"usr"};
4308
	    $trappass = $trap{"pas"};
4309
	}
4310

    
4311
	if (!defined ($AUTHTRAPS{$traphost}))
4312
	{
4313
	    syslog ('err', "received trap from unauthorized host: $fromip");
4314
	    return undef;
4315
	}
4316

    
4317
	if ($trapuser ne "*") {
4318
	    if (!defined $AUTHTRAPS{$traphost}{$trapuser} ||
4319
		crypt ($trappass, $AUTHTRAPS{$traphost}{$trapuser}) ne
4320
		$AUTHTRAPS{$traphost}{$trapuser}) 
4321
	      {
4322
		  syslog ('err', "received trap from unauthorized user $trapuser, host $traphost");
4323
		  return undef;
4324
	      }
4325
	}
4326

    
4327
	#
4328
	# protocol version
4329
	#
4330
	if ($trap{"pro"} < $TRAP_PRO_VERSION)
4331
	{
4332
	    syslog ('err', "cannot handle traps from version less than $TRAP_PRO_VERSION");
4333
	    return undef;
4334
	}
4335

    
4336
	#
4337
	# validate trap type
4338
	#
4339
	if (!defined $trap{"sta"})
4340
	{
4341
	    syslog ('err', "no trap sta value specified from $fromip");
4342
	    return undef;
4343
	}
4344

    
4345
	#
4346
	# if mon receives a trap for an unknown group/service, then the
4347
	# default/default group/service should catch these if it is defined
4348
	#
4349
	if (!defined $watch{$trap{"grp"}} && defined $watch{"default"})
4350
	{
4351
	    $intended = "$trap{'grp'}:$trap{'svc'}";
4352
	    $trap{"grp"} = "default";
4353
	}
4354

    
4355
	if ($trap{"grp"} eq 'default'
4356
	    && !defined($watch{default}->{$trap{"svc"}})
4357
	    && defined($watch{'default'}->{'default'}))
4358
	{
4359
	    $trap{"svc"} = "default";
4360
	}
4361

    
4362
	if (!defined ($groups{$trap{"grp"}}))
4363
	{
4364
	    syslog ('err', "trap received for undefined group $trap{grp}");
4365
	    return;
4366
	}
4367

    
4368
	elsif (!defined $watch{$trap{"grp"}}->{$trap{"svc"}})
4369
	{
4370
	    syslog ('err', "trap received for undefined service type $trap{grp}/$trap{svc}");
4371
	    return;
4372
	}
4373
    }
4374

    
4375
    #
4376
    # trap has been validated, proceed
4377
    #
4378
    my $sref = \%{$watch{$trap{"grp"}}->{$trap{"svc"}}};
4379

    
4380
    #
4381
    # a trap recieved resets the trap timeout timer
4382
    #
4383
    if (exists $sref->{"traptimeout"})
4384
    {
4385
    	$sref->{"_trap_timer"} = $sref->{"traptimeout"};
4386
    }
4387

    
4388

    
4389
    $sref->{"_last_trap"} = $time;
4390

    
4391
    if ($intended)
4392
    {
4393
       $sref->{"_intended"} = $intended;
4394
    }
4395

    
4396
    syslog ('info', "trap $trap{typ} $trap{spc} from " .
4397
	    "$fromip grp=$trap{grp} svc=$trap{svc}, sta=$trap{sta}\n");
4398

    
4399
    $sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
4400
	if ($sref->{"trapduration"});
4401

    
4402
    process_event ("t", $trap{"grp"}, $trap{"svc"}, $trap{"sta"}, "$trap{sum}\n$trap{dtl}");
4403

    
4404
    if( defined($sref->{"_intended"}) )
4405
    {
4406
        undef($sref->{"_intended"});
4407
    }
4408
}
4409

    
4410

    
4411
#
4412
# trap timeout
4413
#
4414
sub handle_trap_timeout {
4415
    my ($group, $service) = @_;
4416
    my ($tmnow);
4417

    
4418
    $tmnow = time;
4419

    
4420
    my $sref = \%{$watch{$group}->{$service}};
4421
    $sref->{"_trap_timer"} = $sref->{"traptimeout"};
4422
    process_event ("T", $group, $service, 1,
4423
    	"trap timeout\n" .
4424
	"trap timeout after " . $sref->{"traptimeout"} . "s at " . localtime ($tmnow) . "\n");
4425
}
4426

    
4427

    
4428
#
4429
# write to a socket
4430
#
4431
sub sock_write {
4432
    my ($sock, $buf) = @_;
4433
    my ($nleft, $nwritten);
4434

    
4435
    $nleft = length ($buf);
4436
    while ($nleft) {
4437
    	$nwritten = syswrite ($sock, $buf, $nleft);
4438
	if (!defined ($nwritten)) {
4439
	    return undef if ($! != EAGAIN);
4440
	    usleep (100000);
4441
	    next;
4442
	}
4443
	$nleft -= $nwritten;
4444
	substr ($buf, 0, $nwritten) = "";
4445
    }
4446
}
4447

    
4448

    
4449
#
4450
# do I/O processing for traps and client connections
4451
#
4452
sub handle_io {
4453

    
4454
    #
4455
    # build iovec for server connections, traps, and clients
4456
    #
4457
    $iovec = '';
4458
    my $niovec = '';
4459
    vec ($iovec, fileno (TRAPSERVER), 1) = 1;
4460
    vec ($iovec, fileno (SERVER), 1) = 1;
4461
    foreach my $cl (keys %clients) {
4462
	vec ($iovec, $cl, 1) = 1;
4463
    }
4464

    
4465
    #
4466
    # handle client I/O while there is some to handle
4467
    #
4468
    my $sleep = $SLEEPINT;
4469
    my $tm0 = [gettimeofday];
4470
    my $n;
4471
    while ($n = select ($niovec = $iovec, undef, undef, $sleep)) {
4472
	my $tm1 = [gettimeofday];
4473

    
4474
	if ($! != &EINTR)
4475
	{
4476
	    #
4477
	    # mon trap
4478
	    #
4479
	    if (vec ($niovec, fileno (TRAPSERVER), 1)) {
4480
		my ($from, $trapbuf);
4481
		if (!defined ($from = recv (TRAPSERVER, $trapbuf, 65536, 0))) {
4482
		    syslog ('err', "error trying to recv a trap: $!");
4483
		} else {
4484
		    handle_trap ($trapbuf, $from);
4485
		}
4486
		next;
4487

    
4488
	    #
4489
	    # client connections
4490
	    #
4491
	    } elsif (vec ($niovec, fileno (SERVER), 1)) {
4492
		client_accept;
4493
	    }
4494

    
4495
	    #
4496
	    # read data from clients if any exists
4497
	    #
4498
	    if ($numclients) {
4499
		foreach my $cl (keys %clients) {
4500
		    next if (!vec ($niovec, $cl, 1));
4501

    
4502
		    my $buf = '';
4503
		    $n = sysread ($clients{$cl}->{"fhandle"}, $buf, 8192);
4504
		    if ($n == 0 && $! != &EAGAIN) {
4505
			client_close ($cl);
4506
		    } elsif (!defined $n) {
4507
			client_close ($cl, "read error: $!");
4508
		    } else {
4509
			$clients{$cl}->{"buf"} .= $buf;
4510
			$clients{$cl}->{"timeout"} = $CF{"CLIENT_TIMEOUT"};
4511
			$clients{$cl}->{"last_read"} = time;
4512
		    }
4513
		}
4514
	    }
4515
	}
4516

    
4517
	#
4518
	# execute client commands which have been read
4519
	#
4520
	client_dopending if ($numclients);
4521

    
4522
	last if (tv_interval ($tm0, $tm1) >= $SLEEPINT);
4523

    
4524
	$sleep = $SLEEPINT - tv_interval ($tm0, $tm1);
4525
    }
4526

    
4527
    if (!defined ($n)) {
4528
	    syslog ('err', "select returned an error for I/O loop: $!");
4529
    }
4530

    
4531
    #
4532
    # count down client inactivity timeouts and close expired connections
4533
    #
4534
    if ($numclients) {
4535
	foreach my $cl (keys %clients) {
4536
	    my $timenow = time;
4537
	    $clients{$cl}->{"timeout"} = $timenow - $clients{$cl}->{"last_read"};
4538

    
4539
	    if ($clients{$cl}->{"timeout"} >= $CF{"CLIENT_TIMEOUT"}) {
4540
		client_close ($cl, "timeout after $CF{CLIENT_TIMEOUT}s");
4541
	    }
4542
	}
4543
    }
4544
}
4545

    
4546

    
4547
#
4548
# generate alert and monitor path hashes
4549
#
4550
sub gen_scriptdir_hash {
4551
    my ($d, @scriptdirs, @alertdirs, $found);
4552

    
4553
    %MONITORHASH = ();
4554
    %ALERTHASH = ();
4555

    
4556
    foreach $d (split (/\s*:\s*/, $CF{"SCRIPTDIR"})) {
4557
	if (-d "$d" && -x "$d") {
4558
	    push (@scriptdirs, $d);
4559
	} else {
4560
	    syslog ('err', "scriptdir $d is not usable");
4561
	}
4562
    }
4563

    
4564
    foreach $d (split (/\s*:\s*/, $CF{"ALERTDIR"})) {
4565
	if (-d $d && -x $d) {
4566
	    push (@alertdirs, $d);
4567
	} else {
4568
	    syslog ('err', "alertdir $d is not usable");
4569
	}
4570
    }
4571

    
4572
    #
4573
    # monitors
4574
    #
4575
    foreach my $group (keys %watch) {
4576
    	foreach my $service (keys %{$watch{$group}}) {
4577
	    next if (!defined $watch{$group}->{$service}->{"monitor"});
4578
	    my $monitor = (split (/\s+/, $watch{$group}->{$service}->{"monitor"}))[0];
4579
	    $found = 0;
4580
	    foreach (@scriptdirs) {
4581
	    	if (-x "$_/$monitor") {
4582
		    $MONITORHASH{$monitor} = "$_/$monitor"
4583
		    	unless (defined $MONITORHASH{$monitor});
4584
		    $found++;
4585
		    last;
4586
		}
4587
	    }
4588
	    if (!$found) {
4589
	    	syslog ('err', "$monitor not found in one of (\@scriptdirs[@scriptdirs])");
4590
	    }
4591
	}
4592
    }
4593

    
4594
    #
4595
    # alerts
4596
    #
4597
    foreach my $group (keys %watch) {
4598
    	foreach my $service (keys %{$watch{$group}}) {
4599
            if ($watch{$group}->{$service}->{"redistribute"} ne '') {
4600
                my $alert = $watch{$group}->{$service}->{"redistribute"};
4601
                $found = 0;
4602
                foreach (@alertdirs) {
4603
		    if (-x "$_/$alert") {
4604
			$ALERTHASH{$alert} = "$_/$alert"
4605
			  unless (defined $ALERTHASH{$alert});
4606
			$found++;
4607
		    }
4608
                }
4609
                if (!$found) {
4610
                    syslog ('err', "$alert not found in one of (\@alerttdirs[@alertdirs])");
4611
                }
4612
            }
4613
	    foreach my $period (keys %{$watch{$group}->{$service}->{"periods"}}) {
4614
		foreach my $my_alert (
4615
			@{$watch{$group}->{$service}->{"periods"}->{$period}->{"alerts"}},
4616
			@{$watch{$group}->{$service}->{"periods"}->{$period}->{"upalerts"}},
4617
			@{$watch{$group}->{$service}->{"periods"}->{$period}->{"startupalerts"}},
4618
			@{$watch{$group}->{$service}->{"periods"}->{$period}->{"ackalerts"}},
4619
			@{$watch{$group}->{$service}->{"periods"}->{$period}->{"disablealerts"}},
4620
			    ) {
4621
		    my $alert = $my_alert;
4622
		    $alert =~ s/^(\S+=\S+ )*(\S+).*$/$2/;
4623
		    $found = 0;
4624
		    foreach (@alertdirs) {
4625
			if (-x "$_/$alert") {
4626
			    $ALERTHASH{$alert} = "$_/$alert"
4627
			    	unless (defined $ALERTHASH{$alert});
4628
			    $found++;
4629
			}
4630
		    }
4631
		    if (!$found) {
4632
			syslog ('err', "$alert not found in one of (\@alerttdirs[@alertdirs])");
4633
		    }
4634
		}
4635
	    }
4636
	}
4637
    }
4638

    
4639
}
4640

    
4641

    
4642
#
4643
# do some processing on dirs
4644
#
4645
sub normalize_paths {
4646

    
4647
    my ($authtype, @authtypes);
4648

    
4649
    #
4650
    # do some sanity checks on dirs
4651
    #
4652
    $CF{"STATEDIR"} = "$CF{BASEDIR}/$CF{STATEDIR}" if ($CF{"STATEDIR"} !~ m{^/});
4653
    syslog ('err', "$CF{STATEDIR} does not exist") if (! -d $CF{"STATEDIR"});
4654

    
4655
    $CF{"LOGDIR"} = "$CF{BASEDIR}/$CF{LOGDIR}" if ($CF{"LOGDIR"} !~ m{^/});
4656
    syslog ('err', "$CF{LOGDIR} does not exist") if (! -d $CF{LOGDIR});
4657

    
4658

    
4659
    $CF{"AUTHFILE"} = "$CF{CFBASEDIR}/$CF{AUTHFILE}"
4660
	    if ($CF{"AUTHFILE"} !~ m{^/});
4661
    syslog ('err', "$CF{AUTHFILE} does not exist")
4662
	    if (! -f $CF{"AUTHFILE"});
4663

    
4664
    @authtypes = split(' ' , $CF{"AUTHTYPE"}) ;
4665
    foreach $authtype (@authtypes) {
4666
	if ($authtype eq "userfile") {
4667
	    $CF{"USERFILE"} = "$CF{CFBASEDIR}/$CF{USERFILE}"
4668
		if ($CF{"USERFILE"} !~ m{^/});
4669
	    syslog ('err', "$CF{USERFILE} does not exist")
4670
		if (! -f $CF{"USERFILE"});
4671
	}
4672
    }
4673

    
4674
    $CF{"DTLOGFILE"} = "$CF{LOGDIR}/$CF{DTLOGFILE}"
4675
	    if ($CF{"DTLOGFILE"} !~ m{^/});
4676

    
4677
    if ($CF{"HISTORICFILE"} ne "") {
4678
	$CF{"HISTORICFILE"} = "$CF{LOGDIR}/$CF{HISTORICFILE}"
4679
		if ($CF{"HISTORICFILE"} !~ m{^/});
4680
    }
4681

    
4682
    #
4683
    # script and alert dirs may have multiple paths
4684
    #
4685
    foreach my $dir (\$CF{"SCRIPTDIR"}, \$CF{"ALERTDIR"}) {
4686
	my @n;
4687
	foreach my $d (split (/\s*:\s*/, $$dir)) {
4688
	    $d =~ s{/$}{};
4689
	    $d = "$CF{BASEDIR}/$d" if ($d !~ m{^/});
4690
	    syslog ('err', "$d does not exist, check your alertdir and mondir paths")
4691
		unless (-d $d);
4692
	    push @n, $d;
4693
	}
4694
	$$dir = join (":", @n);
4695
    }
4696
}
4697

    
4698

    
4699
#
4700
# set opstatus and save old status
4701
#
4702
sub set_op_status {
4703
    my ($group, $service, $status) = @_;
4704

    
4705
    $watch{$group}->{$service}->{"_last_op_status"} = 
4706
	$watch{$group}->{$service}->{"_op_status"};
4707
    $watch{$group}->{$service}->{"_op_status"} = $status;
4708
}
4709

    
4710

    
4711
sub debug_dir {
4712
    print STDERR <<EOF;
4713
    basedir	[$CF{BASEDIR}]
4714
    cfbasedir	[$CF{CFBASEDIR}]
4715

    
4716
    cf		[$CF{CF}]
4717
    statedir	[$CF{STATEDIR}]
4718
    logdir	[$CF{LOGDIR}]
4719
    authfile	[$CF{AUTHFILE}]
4720
    userfile	[$CF{USERFILE}]
4721
    dtlogfile	[$CF{DTLOGFILE}]
4722
    historicfile[$CF{HISTORICFILE}]
4723
    monerrfile  [$CF{MONERRFILE}]
4724
    scriptdir	[$CF{SCRIPTDIR}]
4725
    alertdir	[$CF{ALERTDIR}]
4726
EOF
4727

    
4728
    foreach my $m (keys %MONITORHASH) {
4729
	print STDERR "M $m=[$MONITORHASH{$m}]\n";
4730
    }
4731
    foreach my $m (keys %ALERTHASH) {
4732
	print STDERR "A $m=[$ALERTHASH{$m}]\n";
4733
    }
4734
}
4735

    
4736

    
4737
#
4738
# globals affected by config file are
4739
# all stored in %CF
4740
#
4741
sub init_cf_globals {
4742
    $CF{"BASEDIR"} = $opt{"b"} || "/usr/lib/mon";
4743
    $CF{"BASEDIR"} =~ s{/$}{};
4744
    $CF{"CFBASEDIR"} = $opt{"B"} || "/etc/mon";
4745
    $CF{"CF"} = $opt{"c"} || "$CF{CFBASEDIR}/mon.cf";
4746
    $CF{"CF"} = "$PWD/$CF{CF}" if ($CF{"CF"} !~ /^\//);
4747
    $CF{"SCRIPTDIR"} = "/usr/local/lib/mon/mon.d:mon.d";
4748
    $CF{"ALERTDIR"}  = "/usr/local/lib/mon/alert.d:alert.d";
4749
    $CF{"LOGDIR"} = $opt{"L"} || (-d "/var/log/mon" ? "/var/log/mon" : "log.d");
4750
    $CF{"STATEDIR"}  = -d "/var/state/mon" ? "/var/state/mon"
4751
		: -d "/var/lib/mon" ? "/var/lib/mon"
4752
		: "state.d";
4753
    $CF{"AUTHFILE"}  = "auth.cf";
4754
    $CF{"AUTHTYPE"}  = "getpwnam";
4755
    $CF{"PAMSERVICE"}  = "passwd";
4756
    $CF{"USERFILE"}  = "monusers.cf";
4757
    $CF{"PIDFILE"}   = (-d "/var/run/mon" ? "/var/run/mon"
4758
		    : -d "/var/run" ? "/var/run"
4759
		    : "/etc") . "/mon.pid";
4760
    $CF{"MONERRFILE"} = "/dev/null";
4761
    $CF{"DTLOGFILE"} = "downtime.log";
4762
    $CF{"DTLOGGING"} = 0;
4763
    $CF{"MAX_KEEP"}  = 100;
4764
    $CF{"CLIENT_TIMEOUT"} = 30;
4765
    $CF{"SERVPORT"}  = getservbyname ("mon", "tcp") || 2583;
4766
    $CF{"TRAPPORT"}  = getservbyname ("mon", "udp") || 2583;
4767
    $CF{"CLIENTALLOW"} = '\d+.\d+.\d+.\d+';
4768
    $CF{"MAXPROCS"}  = 0;
4769
    $CF{"HISTORICFILE"} = "";
4770
    $CF{"HISTORICTIME"} = 0;
4771
    $CF{"DEP_RECUR_LIMIT"} = 10;
4772
    $CF{"SYSLOG_FACILITY"} = $opt{"O"} || "daemon";
4773
    $CF{"STARTUPALERTS_ON_RESET"} = 0;
4774
    $CF{"MONREMOTE"} = undef;
4775
}
4776

    
4777

    
4778
#
4779
# globals not affected by config file
4780
#
4781
sub init_globals {
4782
    $TRAP_PRO_VERSION = 0.3807;
4783
    $SLEEPINT  = 1;
4784
    $STOPPED   = 0;
4785
    $STOPPED_TIME = 0;
4786
    $START_TIME = time;
4787
    $PROT_VERSION = 0x2611;
4788
    $HOSTNAME  = hostname;
4789
    $PWD = getcwd;
4790

    
4791
    #
4792
    # flags
4793
    #
4794
    $FL_MONITOR = 1;
4795
    $FL_UPALERT = 2;
4796
    $FL_TRAP = 4;
4797
    $FL_TRAPTIMEOUT = 8;
4798
    $FL_STARTUPALERT = 16;
4799
    $FL_TEST = 32;
4800
    $FL_REDISTRIBUTE = 64;
4801
    $FL_ACKALERT = 128;
4802
    $FL_DISABLEALERT = 256;
4803

    
4804
    #
4805
    # specific trap types
4806
    #
4807
    ($TRAP_COLDSTART, $TRAP_WARMSTART, $TRAP_LINKDOWN, $TRAP_LINKUP,
4808
	$TRAP_AUTHFAIL, $TRAP_EGPNEIGHBORLOSS, $TRAP_ENTERPRISE, $TRAP_HEARTBEAT) = (0..7);
4809

    
4810
    #
4811
    # operational statuses
4812
    #
4813
    ($STAT_FAIL, $STAT_OK, $STAT_COLDSTART, $STAT_WARMSTART, $STAT_LINKDOWN,
4814
	$STAT_UNKNOWN, $STAT_TIMEOUT, $STAT_UNTESTED, $STAT_DEPEND, $STAT_WARN) = (0..9);
4815

    
4816
    %FAILURE = (
4817
    	$STAT_FAIL => 1,
4818
	$STAT_LINKDOWN => 1,
4819
	$STAT_TIMEOUT => 1,
4820
    );
4821

    
4822
    %SUCCESS = (
4823
    	$STAT_OK => 1,
4824
	$STAT_COLDSTART => 1,
4825
	$STAT_WARMSTART => 1,
4826
	$STAT_UNKNOWN => 1,
4827
	$STAT_UNTESTED => 1,
4828
    );
4829

    
4830
    %WARNING = (
4831
    	$STAT_COLDSTART => 1,
4832
	$STAT_WARMSTART => 1,
4833
	$STAT_UNKNOWN => 1,
4834
	$STAT_WARN => 1,
4835
    );
4836

    
4837
    %OPSTAT = ("fail" => $STAT_FAIL, "ok" => $STAT_OK, "coldstart" => $STAT_COLDSTART,
4838
	    "warmstart" => $STAT_WARMSTART, "linkdown" => $STAT_LINKDOWN,
4839
	    "unknown" => $STAT_UNKNOWN, "timeout" => $STAT_TIMEOUT,
4840
	    "untested" => $STAT_UNTESTED);
4841

    
4842
    #
4843
    # fast lookup hashes for alerts and monitors
4844
    #
4845
    %MONITORHASH = ();
4846
    %ALERTHASH = ();
4847
}
4848

    
4849

    
4850
#
4851
# clear timers
4852
#
4853
sub clear_timers {
4854
    my ($group, $service) = @_;
4855

    
4856
    return undef if (!defined $watch{$group}->{$service});
4857

    
4858
    my $sref = \%{$watch{$group}->{$service}};
4859

    
4860
    $sref->{"_trap_timer"} = $sref->{"traptimeout"}
4861
    	if ($sref->{"traptimeout"});
4862

    
4863
    $sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
4864
    	if ($sref->{"trapduration"});
4865

    
4866
    $sref->{"_timer"} = $sref->{"interval"}
4867
    	if ($sref->{"interval"});
4868

    
4869
    $sref->{"_consec_failures"} = 0
4870
      if ($sref->{"_consec_failures"});
4871
	
4872
    foreach my $period (keys %{$sref->{"periods"}}) {
4873
    	my $pref = \%{$sref->{"periods"}->{$period}};
4874

    
4875
	$pref->{"_last_alert"} = 0
4876
	    if ($pref->{"alertevery"});
4877

    
4878
	$pref->{"_consec_failures"} = 0
4879
	    if ($pref->{"alertafter_consec"});
4880

    
4881
	$pref->{'_1stfailtime'} = 0
4882
	    if ($pref->{"alertafterival"});
4883
    }
4884
}
4885

    
4886

    
4887
#
4888
# load some amount of the alert history into memory
4889
#
4890
sub readhistoricfile {
4891
    return if ($CF{"HISTORICFILE"} eq "");
4892

    
4893
    if (!open (HISTFILE, $CF{"HISTORICFILE"})) {
4894
	syslog ('err',  "Could not read history from $CF{HISTORICFILE} : $!");	
4895
	return;
4896
    }
4897

    
4898
    my $epochLimit = 0;
4899
    if ($CF{"HISTORICTIME"} != 0) {
4900
	$epochLimit = time - $CF{"HISTORICTIME"};
4901
    }
4902

    
4903
    @last_alerts = ();
4904

    
4905
    while (<HISTFILE>) {
4906
	next if (/^\s*$/ || /^\s*#/);
4907
    	chomp;
4908
	my $epochAlert = (split(/\s+/))[3];
4909
	push (@last_alerts, $_) if ($epochAlert >= $epochLimit);
4910
    }
4911

    
4912
    close (HISTFILE);
4913

    
4914
    if (defined $CF{"MAX_KEEP"}) {
4915
    	splice(@last_alerts, 0, $#last_alerts + 1 - $CF{"MAX_KEEP"});
4916
    }
4917
}
4918

    
4919

    
4920
#
4921
# This routine simply calls an alert.
4922
#
4923
# call with %args = (
4924
#       group		=> "name of group",
4925
#       service		=> "name of service",
4926
#       pref		=> "optional period reference",
4927
#	alert		=> "alert script",
4928
#	args		=> "args to alert script",
4929
# 	flags		=> "flags, as in $FL_*",
4930
#	retval		=> "return value of monitor",
4931
#	output		=> "output of monitor",
4932
# )
4933
#
4934
sub call_alert {
4935
    my (%args) = @_;
4936

    
4937
    foreach my $mandatory_arg (qw(group service flags
4938
				  retval alert output)) {
4939
        if (!exists $args{$mandatory_arg})
4940
        {
4941
            debug (1, "returning from call_alert because of missing arg $mandatory_arg\n");
4942
            return (undef);
4943
        }
4944
    }
4945

    
4946
    my @groupargs = grep (!/^\*/, @{$groups{$args{"group"}}});
4947

    
4948
    my $tmnow = time;
4949
    my ($summary) = split("\n", $args{"output"});
4950
    $summary = "(NO SUMMARY)" if (!defined $summary || $summary =~ /^\s*$/m);
4951

    
4952
    my $sref = \%{$watch{$args{"group"}}->{$args{"service"}}};
4953
    my $pref;
4954

    
4955
    if (defined $args{"pref"}) {
4956
	$pref = $args{"pref"};
4957
    }
4958

    
4959
    if (! defined $args{"args"}) {
4960
	$args{"args"} = '';
4961
    }
4962

    
4963
    my $alert = "";
4964
    if (!defined $ALERTHASH{$args{"alert"}} ||
4965
	    ! -f $ALERTHASH{$args{"alert"}}) {
4966
	syslog ('err', "no alert found while trying to run $args{alert}");
4967
	return undef;
4968
    } else {
4969
	$alert = $ALERTHASH{$args{"alert"}};
4970
    }
4971

    
4972
    my $alerttype = "";           # sent to syslog and stored in @last_alerts
4973
    my $alert_type = "failure";   # MON_ALERTTYPE set to this
4974
    if ($args{"flags"} & $FL_UPALERT) {
4975
    	$alerttype = "upalert";
4976
	$alert_type = "up";
4977
    } elsif ($args{"flags"} & $FL_STARTUPALERT) {
4978
    	$alerttype = "startupalert";
4979
	$alert_type = "startup";
4980
    } elsif ($args{"flags"} & $FL_ACKALERT) {
4981
    	$alerttype = "ackalert";
4982
	$alert_type = "ack";
4983
    } elsif ($args{"flags"} & $FL_DISABLEALERT) {
4984
    	$alerttype = "disablealert";
4985
	$alert_type = "disable";
4986
    } elsif ($args{"flags"} & $FL_TRAPTIMEOUT) {
4987
    	$alerttype = "traptimeoutalert";
4988
	$alert_type = "traptimeout";
4989
    } elsif ($args{"flags"} & $FL_TRAP) {
4990
    	$alerttype = "trapalert";
4991
	$alert_type = "trap";
4992
    } elsif ($args{"flags"} & $FL_TEST) {
4993
    	$alerttype = "testalert";
4994
	$alert_type = "test";
4995
    } else {
4996
    	$alerttype = "alert";
4997
    }
4998

    
4999
    #
5000
    # log why we are triggering an alert
5001
    #
5002
    my $a = $alert;
5003
    $a =~ s{^.*/([^/]+)$}{$1};
5004
    syslog ("alert", "calling $alerttype $a for" .
5005
	" $args{group}/$args{service} ($alert,$args{args}) $summary") if (!($args{"flags"} & $FL_REDISTRIBUTE));;
5006

    
5007
        
5008
    # We may block while writing to the alert script, so we'll fork first, allowing the
5009
    # master process to move on.
5010
	    
5011
    my $pid;
5012
    if ($pid = fork()) {  ## Master
5013
	# Do Nothing
5014
    } elsif (defined($pid)) { ## Child
5015
	my $pid = open (ALERT, "|-");
5016
	if (!defined $pid) {
5017
	    syslog ('err', "could not fork: $!");
5018
	    return undef;
5019
	}
5020

    
5021
	#
5022
	# grandchild, the actual alert
5023
	#
5024
	if ($pid == 0) {
5025
	    #
5026
	    # set env variables to pass to the alert
5027
	    #
5028
	    foreach my $v (keys %{$sref->{"ENV"}}) {
5029
		$ENV{$v} = $sref->{"ENV"}->{$v};
5030
	    }
5031

    
5032
	    $ENV{"MON_LAST_SUMMARY"}	= $sref->{"_last_summary"} if (defined $sref->{"_last_summary"});
5033
	    $ENV{"MON_LAST_OUTPUT"}		= $sref->{"_last_output"} if (defined $sref->{"_last_output"});
5034
	    $ENV{"MON_LAST_FAILURE"}	= $sref->{"_last_failure"} if (defined $sref->{"_last_failure"});
5035
	    $ENV{"MON_FIRST_FAILURE"}	= $sref->{"_first_failure"} if (defined $sref->{"_first_failure"});
5036
	    $ENV{"MON_FIRST_SUCCESS"}	= $sref->{"_first_success"} if (defined $sref->{"_last_success"});
5037
	    $ENV{"MON_LAST_SUCCESS"}	= $sref->{"_last_success"} if (defined $sref->{"_last_success"});
5038
	    $ENV{"MON_DESCRIPTION"}		= $sref->{"description"} if (defined $sref->{"description"});
5039
	    $ENV{"MON_GROUP"}		= $args{"group"} if (defined $args{"group"});
5040
	    $ENV{"MON_SERVICE"}		= $args{"service"} if (defined $args{"service"});
5041
	    $ENV{"MON_RETVAL"}		= $args{"retval"} if (defined $args{"retval"});
5042
	    $ENV{"MON_OPSTATUS"}		= $sref->{"_op_status"} if (defined $sref->{"_op_status"});
5043
	    $ENV{"MON_LAST_OPSTATUS"}		= $sref->{"_last_op_status"} if (defined $sref->{"_last_op_status"});
5044
	    $ENV{"MON_ACK"}                 = $sref->{"_ack_comment"} if ($sref->{"_ack"} && $sref->{"_ack_comment"} ne "");
5045
	    $ENV{"MON_ALERTTYPE"}		= $alert_type;
5046
	    $ENV{"MON_STATEDIR"}		= $CF{"STATEDIR"};
5047
	    $ENV{"MON_LOGDIR"}		= $CF{"LOGDIR"};
5048
	    $ENV{"MON_CFBASEDIR"}		= $CF{"CFBASEDIR"};
5049
	    
5050
	    if( defined($sref->{"_intended"}) )
5051
	      {
5052
		  $ENV{"MON_TRAP_INTENDED"} = $sref->{"_intended"};
5053
	      }
5054
	    
5055
	    else
5056
	      {
5057
		  undef ($ENV{"MON_TRAP_INTENDED"}) if (defined($ENV{"MON_TRAP_INTENDED"}));
5058
	      }
5059

    
5060
	    my $t;
5061
	    $t = "-u" if ($args{"flags"} & $FL_UPALERT);
5062
	    $t = "-a" if ($args{"flags"} & $FL_ACKALERT);
5063
	    $t = "-D" if ($args{"flags"} & $FL_DISABLEALERT);
5064
	    $t = "-T" if ($args{"flags"} & $FL_TRAP);
5065
	    $t = "-O" if ($args{"flags"} & $FL_TRAPTIMEOUT);
5066
	    
5067
	    my @execargs = (
5068
			    $alert,
5069
			    "-s", "$args{service}",
5070
			    "-g", "$args{group}",
5071
			    "-h", "@groupargs",
5072
			    "-t", "$tmnow",
5073
			   );
5074

    
5075
	    if ($t) {
5076
		push @execargs, $t;
5077
	    }
5078
	    
5079
	    if ($args{"args"} ne "") {
5080
		push @execargs, quotewords('\s+',0,$args{"args"});
5081
	    }
5082
	    
5083
	    if (!exec @execargs) {
5084
		syslog ('err', "could not exec alert $alert: $!");
5085
		return undef;
5086
	    }
5087
	    exit;
5088
	}
5089

    
5090
	#
5091
	# this will block if the alert is sucking gas, which is why we forked above
5092
	#
5093
	print ALERT $args{"output"};
5094
	close (ALERT);
5095
	exit;
5096
    }
5097

    
5098
    #
5099
    # test alerts and redistributions don't count
5100
    #
5101
    return (1) if ($args{"flags"} & ($FL_TEST | $FL_REDISTRIBUTE));
5102

    
5103
    #
5104
    # tally this alert
5105
    #
5106
    if (defined $args{"pref"}) {
5107
	$pref->{"_last_alert"} = $tmnow;
5108
    }
5109
    $sref->{"_alert_count"}++;
5110

    
5111
    #
5112
    # store this in the log
5113
    #
5114
    shift @last_alerts if (@last_alerts > $CF{"MAX_KEEP"});
5115

    
5116
    my $alertline = "$alerttype $args{group} $args{service}" .
5117
	" $tmnow $alert ($args{args}) $summary";
5118
    push @last_alerts, $alertline;
5119

    
5120
    #
5121
    # append to alert history file
5122
    #
5123
    if ($CF{"HISTORICFILE"} ne "") {
5124
    	if (!open (HISTFILE, ">>$CF{HISTORICFILE}")) {
5125
	    syslog ('err',  "Could not append alert history to $CF{HISTORICFILE}: $!");
5126
	} else {
5127
	    print HISTFILE $alertline, "\n";
5128
	    close (HISTFILE);
5129
	}
5130
    }
5131

    
5132
    return 1;
5133
}
5134

    
5135

    
5136
#
5137
# recursively evaluate a dependency expression
5138
# substitutes "GROUP:SERVICE" with "1" or "0" if the service is pass/fail, resp.
5139
#
5140
# returns an anonymous hash reference
5141
#
5142
# {
5143
#	status =>,           # "D"  recursion depth exceeded
5144
#                            # "O"  everything is OK
5145
#                            # "E"  eval error
5146
#	depend =>,           # 1 for success (no deps in a failure state)
5147
#                            # 0 if any deps failed
5148
#	error =>,            # the textual error associated with "D" or "E" status
5149
# }
5150
#
5151
sub depend {
5152
    my ($depend, $depth, $deptype) = @_;
5153
    debug (2, "checking DEP [$depend]\n");
5154

    
5155
    if ($depth > $CF{"DEP_RECUR_LIMIT"}) {
5156
	return {
5157
	    status => "D",
5158
	    depend => undef,
5159
	    error  => "recursion too deep for ($depend)",
5160
	};
5161
    }
5162

    
5163
    foreach my $depstr ($depend =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g)
5164
    {
5165
	my ($group ,$service) = split(':', $depstr);
5166

    
5167
	my $sref = \%{$watch{$group}->{$service}};
5168
	my $depval = undef;
5169
	my $subdepend = "";
5170
	if (defined $sref->{"depend"} && $sref->{"dep_behavior"} eq $deptype) {
5171
	    $subdepend = $sref->{"depend"};
5172
	} elsif ($deptype eq 'a' && defined $sref->{"alertdepend"}) {
5173
	    $subdepend = $sref->{"alertdepend"};
5174
	} elsif ($deptype eq 'm' && defined $sref->{"monitordepend"}) {
5175
	    $subdepend = $sref->{"monitordepend"};
5176
	} 
5177

    
5178
	#
5179
	# disabled watches and services used to be counted as "passing"
5180
	# now we'll use the actual values, to avoid having dependent services
5181
        # alert when a broken service gets disabled
5182
	#
5183
#	if ((exists $watch_disabled{$group} && $watch_disabled{$group}) || (defined $sref->{"disable"} && $sref->{"disable"} == 1))
5184
#	{
5185
#	    $depval = 1;
5186
#
5187
	#
5188
	# root dependency found
5189
	#
5190
#	}
5191
#	elsif ($subdepend eq "")
5192
	if ($subdepend eq "")
5193
	{
5194
	    debug (2, "  found root dep $group,$service\n");
5195

    
5196
	    $depval = $SUCCESS{$sref->{"_op_status"}} && ($sref->{"_last_failure_time"} < (time - $sref->{"dep_memory"}));
5197

    
5198
	#
5199
	# not a root dep, recurse
5200
	#
5201
	}
5202
	else
5203
	{
5204
	    #
5205
	    # do it recursively
5206
	    #
5207
	    my $dstatus = depend ($subdepend, $depth + 1, $deptype);
5208
	    debug (2,
5209
	    	"recur depth $depth returned $dstatus->{status},$dstatus->{depend}\n");
5210

    
5211
	    #
5212
	    # a bad thing happened, bail out
5213
	    #
5214
	    if ($dstatus->{"status"} ne "O")
5215
	    {
5216
		debug (2,
5217
		    "recursive dep failure for $group,$service (status=$dstatus->{status})\n");
5218
		return $dstatus;
5219
	    }
5220

    
5221
	    $depval = $dstatus->{"depend"} && $SUCCESS{$sref->{"_op_status"}}
5222
	              && ($sref->{"_last_failure_time"} < (time - $sref->{"dep_memory"}));
5223
	}
5224

    
5225
	my $v = int ($depval);
5226
	debug (2, "  ($group,$service) $depth depend=[$v][$depend]");
5227
	$depend =~ s/\b$depstr\b/$v/g;
5228
	debug (2, "  depend=[$depend]\n");
5229
    }
5230

    
5231
    debug (2, "  before eval: [$depend]");
5232
    my $e = eval("$DEP_EVAL_SANDBOX $depend");
5233
    debug (2, "  after eval: [$e]\n");
5234

    
5235
    if ($@ eq "")
5236
    {
5237
	return
5238
	{
5239
	    status	=> "O",
5240
	    depend	=> $e,
5241
	};
5242

    
5243
    }
5244
    else
5245
    {
5246
    	return
5247
	{
5248
	    status	=> "E",
5249
	    depend	=> $e,
5250
	    error	=> $@,
5251
	};
5252
    }
5253
}
5254

    
5255

    
5256
#
5257
# returns undef on error
5258
#         0 if dependency failure, sets _depend_status to 0
5259
#         1 if dependencies are OK, sets _depend_status to 1
5260
#
5261
sub dep_ok
5262
{
5263
    my $sref = shift;
5264
    my $deptype = shift;
5265
    my $depend = "";
5266
    if (defined $sref->{"depend"} && $sref->{"dep_behavior"} eq $deptype) {
5267
	$depend = $sref->{"depend"};
5268
    } elsif ($deptype eq 'a' && defined $sref->{"alertdepend"}) {
5269
	$depend = $sref->{"alertdepend"};
5270
    } elsif ($deptype eq 'm' && defined $sref->{"monitordepend"}) {
5271
	$depend = $sref->{"monitordepend"};
5272
    }
5273

    
5274
    return 1 unless ($depend ne "");
5275

    
5276
    my $s = depend ($depend, 0, $deptype);
5277

    
5278
    if ($s->{"status"} eq "D")
5279
    {
5280
	debug (2, "dep recursion too deep\n");
5281
	return undef;
5282

    
5283
    }
5284
    elsif ($s->{"status"} eq "E")
5285
    {
5286
	syslog ("notice", "eval error for dependency starting at $depend: ".$s->{error});
5287
	return undef;
5288
    }
5289
    elsif ($s->{"status"} eq "O" && !$s->{"depend"})
5290
    {
5291
	$sref->{"_depend_status"} = 0;
5292
	return 0;
5293
    }
5294

    
5295
    $sref->{"_depend_status"} = 1;
5296

    
5297
    return 1;
5298
}
5299

    
5300

    
5301
#
5302
# returns undef on error
5303
#         otherwise a reference to a list summaries from all 
5304
#            DIRECT dependencies currently failing
5305
sub dep_summary 
5306
{
5307
    my $sref = shift;
5308
    my @sum;
5309
    my @deps = ();
5310
    
5311
    if (defined $sref->{"depend"} && $sref->{"dep_behavior"} eq "hm") {
5312
	@deps = ($sref->{"depend"} =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g);
5313
    } elsif (defined $sref->{"hostdepend"}) {
5314
	@deps = ($sref->{"hostdepend"} =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g);
5315
    }
5316
    
5317
    return [] if (! @deps);
5318

    
5319
    foreach (@deps) {
5320
	my ($group, $service) = split /:/;
5321
	if (!(exists $watch{$group} && exists $watch{$group}->{$service})) {
5322
	    return undef;
5323
	}
5324
	
5325
	if ($watch{$group}->{$service}{"_op_status"} == $STAT_FAIL) {
5326
	    push @sum, $watch{$group}->{$service}{"_last_summary"};
5327
	} elsif ($watch{$group}->{$service}{"_last_failure_time"} >= (time - $watch{$group}->{$service}{"dep_memory"})) {
5328
	    push @sum, $watch{$group}->{$service}{"_last_failure_summary"};
5329
	}
5330
    }
5331

    
5332
    return \@sum;
5333
}
5334
    
5335
#
5336
# convert a string to a hex-escaped string, returning
5337
# the escaped string.
5338
#
5339
# $str is the string to be escaped
5340
# if $inquotes is true, backslashes are doubled, making
5341
#       the escaped string suitable to be enclosed in
5342
#       single quotes and later passed to Text::quotewords.
5343
#       For example,   var='quoted value'
5344
#
5345
sub esc_str {
5346
    my $str = shift;
5347
    my $inquotes = shift;
5348

    
5349
    my $escstr = "";
5350

    
5351
    return $escstr if (!defined $str);
5352

    
5353
    for (my $i = 0; $i < length ($str); $i++)
5354
    {
5355
    	my $c = substr ($str, $i, 1);
5356

    
5357
	if (ord ($c) <= 32 ||
5358
	    ord ($c) > 126 ||
5359
	    $c eq "\"" ||
5360
	    $c eq "\'")
5361
	{
5362
	    $c = sprintf ("\\%02x", ord($c));
5363
	}
5364
	elsif ($inquotes && $c eq "\\")
5365
	{
5366
	    $c = "\\\\";
5367
	}
5368

    
5369
	$escstr .= $c;
5370
    }
5371

    
5372
    $escstr;
5373
}
5374

    
5375

    
5376
#
5377
# convert a hex-escaped string into an unescaped string,
5378
# returning the unescaped string
5379
#
5380
sub un_esc_str {
5381
    my $str = shift;
5382

    
5383
    $str =~ s{\\([0-9a-f]{2})}{chr(hex($1))}eg;
5384

    
5385
    $str;
5386
}
5387

    
5388

    
5389
sub syslog_die {
5390
    my $msg = shift;
5391

    
5392
    syslog ("err", $msg);
5393
    die "$msg\n";
5394
}
5395

    
5396
no warnings; # Redefining syslog
5397
sub syslog {
5398
   eval {
5399
       local $SIG{"__DIE__"}= sub { }; 
5400
       my @log = map { s/\%//mg; } @_;
5401
       Sys::Syslog::syslog(@log);
5402
   }
5403
}
5404
use warnings;
5405

    
5406
#
5407
# Have a "conversation" with a PAM authentication module. This fools the
5408
# PAM module into authenticating us non-interactively.
5409
#
5410
sub pam_conv_func {
5411
    my @res;
5412
    while ( @_ ) {
5413
	my $code = shift;
5414
	my $msg = shift;
5415
	my $ans = "";
5416

    
5417
	$ans = $PAM_username if ($code == Authen::PAM::PAM_PROMPT_ECHO_ON() );
5418
	$ans = $PAM_password if ($code == Authen::PAM::PAM_PROMPT_ECHO_OFF() );
5419

    
5420
	push @res, Authen::PAM::PAM_SUCCESS();
5421
	push @res, $ans;
5422
    }
5423
    push @res, Authen::PAM::PAM_SUCCESS();
5424
    return @res;
5425
}
5426

    
5427

    
5428
sub write_dtlog
5429
{
5430
    my ($sref, $group, $service) = @_;
5431

    
5432
    my $tmnow = time;
5433

    
5434
    $sref->{"_first_failure"} = $START_TIME
5435
       if ($sref->{"_first_failure"} == 0);
5436

    
5437
    if (!open (DTLOG, ">>$CF{DTLOGFILE}"))
5438
    {
5439
    	syslog ('err', "could not append to $CF{DTLOGFILE}: $!");
5440
	$CF{"DTLOGGING"} = 0;
5441
    }
5442

    
5443
    else
5444
    {
5445
	$CF{"DTLOGGING"} = 1;
5446
	print DTLOG ($tmnow,
5447
	   " $group",
5448
	   " $service",
5449
	   " ", 0 + $sref->{"_first_failure"},
5450
	   " ", 0 + $tmnow - $sref->{"_first_failure"},
5451
	   " ", 0 + $sref->{'interval'},
5452
	   " $sref->{'_last_summary'}\n") or
5453
	   syslog ('err', "error writing to $CF{DTLOGFILE}: $!");
5454
	close(DTLOG);
5455
    }
5456
}
5457
 
5458
# Perl's "system" function blocks.  We don't want the mon process to 
5459
# ever block.  So we fork then call system.  Mon will handle the 
5460
# child process cleanup elsewhere.
5461
sub mysystem {
5462
  my @args = @_;
5463
  my $pid;
5464
  print STDERR "mysystem called: @args\n";
5465
  if ($pid = fork()) {         ## parent
5466
      return;
5467
  } elsif (defined($pid)) {    ## child
5468
      system(@args);
5469
      exit(0)
5470
  } else {                      ## parent - fork failed
5471
      print STDERR "You lose!\n";
5472
  }
5473
  print STDERR "mysystem returning\n";
5474
};
(9-9/27)