1
|
#!/usr/bin/perl
|
2
|
#
|
3
|
# mon - schedules service tests and triggers alerts upon failures
|
4
|
#
|
5
|
# Jim Trocki, trockij@arctic.org
|
6
|
#
|
7
|
# $Id: mon.pl,v 1.1 2012-10-23 19:57:32 cabo Exp $
|
8
|
#
|
9
|
# Copyright (C) 1998 Jim Trocki
|
10
|
#
|
11
|
# This program is free software; you can redistribute it and/or modify
|
12
|
# it under the terms of the GNU General Public License as published by
|
13
|
# the Free Software Foundation; either version 2 of the License, or
|
14
|
# (at your option) any later version.
|
15
|
#
|
16
|
# This program is distributed in the hope that it will be useful,
|
17
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
# GNU General Public License for more details.
|
20
|
#
|
21
|
# You should have received a copy of the GNU General Public License
|
22
|
# along with this program; if not, write to the Free Software
|
23
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
24
|
#
|
25
|
#
|
26
|
use strict;
|
27
|
|
28
|
my $RCSID='$Id: mon.pl,v 1.1 2012-10-23 19:57:32 cabo Exp $';
|
29
|
my $AUTHOR='trockij@arctic.org';
|
30
|
my $RELEASE='$Name: $';
|
31
|
|
32
|
#
|
33
|
# NetBSD rc.d script compatibility
|
34
|
#
|
35
|
$0= "mon" . " " . join(" ", @ARGV) if $^O eq "netbsd";
|
36
|
|
37
|
#
|
38
|
# modules in the perl distribution
|
39
|
#
|
40
|
use Getopt::Long qw(:config no_ignore_case);
|
41
|
use Text::ParseWords;
|
42
|
use POSIX;
|
43
|
use Fcntl;
|
44
|
use Socket;
|
45
|
use Sys::Hostname;
|
46
|
use Sys::Syslog qw(:DEFAULT);
|
47
|
use FileHandle;
|
48
|
|
49
|
use Data::Dumper;
|
50
|
|
51
|
#
|
52
|
# CPAN modules
|
53
|
#
|
54
|
use Time::HiRes qw(gettimeofday tv_interval usleep);
|
55
|
use Time::Period;
|
56
|
|
57
|
sub auth;
|
58
|
sub call_alert;
|
59
|
sub check_auth;
|
60
|
sub clear_timers;
|
61
|
sub client_accept;
|
62
|
sub client_close;
|
63
|
sub client_command;
|
64
|
sub client_dopending;
|
65
|
sub client_write_opstatus;
|
66
|
sub collect_output;
|
67
|
sub daemon;
|
68
|
sub debug;
|
69
|
sub debug_dir;
|
70
|
sub dep_ok;
|
71
|
sub dep_summary;
|
72
|
sub depend;
|
73
|
sub dhmstos;
|
74
|
sub die_die;
|
75
|
sub disen_host;
|
76
|
sub disen_service;
|
77
|
sub disen_watch;
|
78
|
sub do_alert;
|
79
|
sub do_startup_alerts;
|
80
|
sub err_startup;
|
81
|
sub esc_str;
|
82
|
sub gen_scriptdir_hash;
|
83
|
sub handle_io;
|
84
|
sub handle_trap;
|
85
|
sub handle_trap_timeout;
|
86
|
sub host_exists;
|
87
|
sub host_singleton_group;
|
88
|
sub inRange;
|
89
|
sub init_cf_globals;
|
90
|
sub init_globals;
|
91
|
sub load_auth;
|
92
|
sub load_state;
|
93
|
sub normalize_paths;
|
94
|
sub mysystem;
|
95
|
sub init_dtlog;
|
96
|
sub pam_conv_func;
|
97
|
sub proc_cleanup;
|
98
|
sub process_event;
|
99
|
sub randomize_startdelay;
|
100
|
sub read_cf;
|
101
|
sub readhistoricfile;
|
102
|
sub reload;
|
103
|
sub remove_proc;
|
104
|
sub reset_server;
|
105
|
sub run_monitor;
|
106
|
sub save_state;
|
107
|
sub set_last_test;
|
108
|
sub set_op_status;
|
109
|
sub reset_timer;
|
110
|
sub setup_server;
|
111
|
sub sock_write;
|
112
|
sub syslog_die;
|
113
|
sub un_esc_str;
|
114
|
sub usage;
|
115
|
sub write_dtlog;
|
116
|
|
117
|
#
|
118
|
# globals
|
119
|
#
|
120
|
my %opt; # cmdline arguments
|
121
|
my %CF; # configuration directives
|
122
|
my $PWD; # current working directory
|
123
|
my $HOSTNAME; # system hostname
|
124
|
my $STOPPED; # 1 = scheduler stopped, 0 = not stopped
|
125
|
my $STOPPED_TIME; # time(2) scheduler was stopped, if stopped
|
126
|
my $SLEEPINT; # don't touch
|
127
|
my %watch_disabled; # watches disabled, indexed by watch
|
128
|
my %watch; # main configuration file data structure
|
129
|
my %alias; # aliases
|
130
|
my %groups; # hostgroups, indexed by group
|
131
|
my %views; # view lists, indexed by name
|
132
|
my %view_users; # view preferences, per user
|
133
|
|
134
|
#
|
135
|
# I/O routine globals
|
136
|
#
|
137
|
my %clients; # fds of connected clients
|
138
|
my $numclients; # count of connected clients
|
139
|
my %running; # procs which are forked and running,
|
140
|
# indexed by group/service
|
141
|
my $iovec; # used for select loop
|
142
|
my %runningpid; # procs which are forked and running,
|
143
|
# indexed by PID
|
144
|
my $procs; # number of outstanding procs
|
145
|
my %fhandles; # input file handles of children
|
146
|
my %ibufs; # buffer structure to hold data from children
|
147
|
my ($fdset_rbits, $fdset_ebits);
|
148
|
|
149
|
#
|
150
|
# history globals
|
151
|
#
|
152
|
my @last_alerts; # alert history, in memory
|
153
|
my @last_failures; # failure history, in memory
|
154
|
|
155
|
#
|
156
|
# misc. globals
|
157
|
#
|
158
|
my $i; # loop iteration counter, used for debugging only
|
159
|
my $lasttm; # the last time(2) the mon loop started
|
160
|
my $pid_file_owner; # set when creating pid file
|
161
|
my $tm; # used in main loop
|
162
|
|
163
|
#
|
164
|
# authentication structure globals
|
165
|
#
|
166
|
my %AUTHCMDS;
|
167
|
my %NOAUTHCMDS;
|
168
|
my %AUTHTRAPS;
|
169
|
|
170
|
#
|
171
|
# PAM authentication globals (must not be lexically scoped)
|
172
|
#
|
173
|
use vars qw ( $PAM_username $PAM_password ) ;
|
174
|
|
175
|
|
176
|
#
|
177
|
# opstatus globals
|
178
|
#
|
179
|
my (%OPSTAT, %FAILURE, %SUCCESS, %WARNING); # operational statuses
|
180
|
my ($TRAP_COLDSTART, $TRAP_WARMSTART, # trap types
|
181
|
$TRAP_LINKDOWN, $TRAP_LINKUP,
|
182
|
$TRAP_AUTHFAIL, $TRAP_EGPNEIGHBORLOSS,
|
183
|
$TRAP_ENTERPRISE, $TRAP_HEARTBEAT);
|
184
|
|
185
|
my ($STAT_FAIL, $STAT_OK, $STAT_COLDSTART, # _op_status values
|
186
|
$STAT_WARMSTART, $STAT_LINKDOWN,
|
187
|
$STAT_UNKNOWN, $STAT_TIMEOUT,
|
188
|
$STAT_UNTESTED, $STAT_DEPEND, $STAT_WARN);
|
189
|
|
190
|
my ($FL_MONITOR, $FL_UPALERT, # alert type flags
|
191
|
$FL_TRAP, $FL_TRAPTIMEOUT,
|
192
|
$FL_STARTUPALERT, $FL_TEST, $FL_REDISTRIBUTE,
|
193
|
$FL_ACKALERT, $FL_DISABLEALERT);
|
194
|
|
195
|
my $TRAP_PDU;
|
196
|
my (%ALERTHASH, %MONITORHASH); # hash of pathnames for
|
197
|
# alerts/monitors
|
198
|
my $PROT_VERSION;
|
199
|
my $START_TIME; # time(2) server started
|
200
|
my $TRAP_PRO_VERSION; # trap protocol version
|
201
|
my $DEP_EVAL_SANDBOX; # perl environment for
|
202
|
# dep evals
|
203
|
|
204
|
#
|
205
|
# argument parsing
|
206
|
#
|
207
|
my $getopt_result = GetOptions(\%opt,
|
208
|
qw/
|
209
|
A|authfile=s
|
210
|
B|cfbasedir=s
|
211
|
D|statedir=s
|
212
|
L|logdir=s
|
213
|
M|m4:s
|
214
|
O|syslogfacility=s
|
215
|
P|pidfile=s
|
216
|
S|stopped
|
217
|
a|alertdir=s
|
218
|
b|basedir=s
|
219
|
c|configfile=s
|
220
|
d|debug+
|
221
|
f|fork
|
222
|
h|help
|
223
|
i|sleep=i
|
224
|
k|maxkeep=i
|
225
|
l|loadstate:s
|
226
|
m|maxprocs=i
|
227
|
p|port=i
|
228
|
r|randstart=s
|
229
|
s|scriptdir=s
|
230
|
t|trapport=i
|
231
|
v|version
|
232
|
/);
|
233
|
|
234
|
if (!$getopt_result) {
|
235
|
usage();
|
236
|
exit;
|
237
|
}
|
238
|
|
239
|
#
|
240
|
# these two things can be taken care of without
|
241
|
# initializing things further
|
242
|
#
|
243
|
if ($opt{"v"}) {
|
244
|
print "$RCSID\n$RELEASE\n";
|
245
|
exit;
|
246
|
}
|
247
|
|
248
|
if ($opt{"h"}) {
|
249
|
usage();
|
250
|
exit;
|
251
|
}
|
252
|
|
253
|
if ($opt{"d"})
|
254
|
{
|
255
|
eval 'require Data::Dumper;';
|
256
|
|
257
|
if ($@ ne "")
|
258
|
{
|
259
|
die "error: $@\n";
|
260
|
}
|
261
|
}
|
262
|
|
263
|
if ($^O eq "linux" || $^O =~ /^(open|free|net)bsd$/ || $^O eq "aix")
|
264
|
{
|
265
|
Sys::Syslog::setlogsock ('unix');
|
266
|
}
|
267
|
|
268
|
elsif ($^O eq "solaris")
|
269
|
{
|
270
|
Sys::Syslog::setlogsock ('stream');
|
271
|
}
|
272
|
|
273
|
openlog ("mon", "cons,pid", $CF{"SYSLOG_FACILITY"});
|
274
|
|
275
|
#
|
276
|
# definitions
|
277
|
#
|
278
|
die "basedir $opt{b} does not exist\n" if ($opt{"b"} && ! -d $opt{"b"});
|
279
|
|
280
|
init_globals();
|
281
|
init_cf_globals();
|
282
|
|
283
|
syslog_die ("config file $CF{CF} does not exist") if (! -f $CF{"CF"});
|
284
|
|
285
|
#
|
286
|
# read config file
|
287
|
#
|
288
|
if ((my $err = read_cf ($CF{"CF"}, 1)) ne "") {
|
289
|
syslog_die ("$err");
|
290
|
}
|
291
|
|
292
|
closelog;
|
293
|
|
294
|
openlog ("mon", "cons,pid", $CF{"SYSLOG_FACILITY"});
|
295
|
|
296
|
#
|
297
|
# cmdline args override config file
|
298
|
#
|
299
|
$CF{"ALERTDIR"} = $opt{"a"} if ($opt{"a"});
|
300
|
$CF{"BASEDIR"} = $opt{"b"} if ($opt{"b"});
|
301
|
$CF{"AUTHFILE"} = $opt{"A"} if ($opt{"A"});
|
302
|
$CF{"LOGDIR"} = $opt{"L"} if ($opt{"L"});
|
303
|
$CF{"STATEDIR"} = $opt{"D"} if ($opt{"D"});
|
304
|
$CF{"SCRIPTDIR"} = $opt{"s"} if ($opt{"s"});
|
305
|
|
306
|
$CF{"PIDFILE"} = $opt{"P"} if defined($opt{"P"}); # allow empty pidfile
|
307
|
$CF{"MAX_KEEP"} = $opt{"k"} if ($opt{"k"});
|
308
|
$CF{"MAXPROCS"} = $opt{"m"} if ($opt{"m"});
|
309
|
$CF{"SERVPORT"} = $opt{"p"} if ($opt{"p"});
|
310
|
$CF{"TRAPPORT"} = $opt{"t"} if ($opt{"t"});
|
311
|
|
312
|
$SLEEPINT = $opt{"i"} if ($opt{"i"});
|
313
|
|
314
|
if ($opt{"r"}) {
|
315
|
syslog_die ("bad randstart value") if (!defined (dhmstos ($opt{"r"})));
|
316
|
$CF{"RANDSTART"} = dhmstos($opt{"r"});
|
317
|
}
|
318
|
|
319
|
if ($opt{"S"}) {
|
320
|
$STOPPED = 1;
|
321
|
$STOPPED_TIME = time;
|
322
|
}
|
323
|
|
324
|
|
325
|
#
|
326
|
# do some path cleanups and
|
327
|
# build lookup tables for alerts and monitors
|
328
|
#
|
329
|
normalize_paths();
|
330
|
gen_scriptdir_hash();
|
331
|
|
332
|
if ($opt{"d"}) {
|
333
|
debug_dir();
|
334
|
}
|
335
|
|
336
|
#
|
337
|
# load the auth control, bind, and listen
|
338
|
#
|
339
|
load_auth (1);
|
340
|
load_view_users(1);
|
341
|
|
342
|
#
|
343
|
# init client interface
|
344
|
# %clients is an I/O structure, indexed by the fd of the client
|
345
|
# $numclients is the number of clients currently connected
|
346
|
# $iovec is fd_set for clients and traps
|
347
|
#
|
348
|
%clients = ();
|
349
|
$numclients = 0;
|
350
|
$iovec = '';
|
351
|
setup_server();
|
352
|
|
353
|
#
|
354
|
# fork and become a daemon
|
355
|
#
|
356
|
init_dtlog() if ($CF{"DTLOGGING"});
|
357
|
daemon() if ($opt{"f"});
|
358
|
if ($CF{"PIDFILE"} ne '' && open PID, ">$CF{PIDFILE}") {
|
359
|
$pid_file_owner = $$;
|
360
|
print PID "$pid_file_owner\n";
|
361
|
close PID;
|
362
|
}
|
363
|
set_last_test ();
|
364
|
|
365
|
#
|
366
|
# randomize startup checks if asked to
|
367
|
#
|
368
|
randomize_startdelay() if ($CF{"RANDSTART"});
|
369
|
|
370
|
@last_alerts = ();
|
371
|
@last_failures = ();
|
372
|
readhistoricfile ();
|
373
|
|
374
|
$procs = 0;
|
375
|
$i=0;
|
376
|
$lasttm=time;
|
377
|
$fdset_rbits = $fdset_ebits = '';
|
378
|
%watch_disabled = ();
|
379
|
|
380
|
$SIG{HUP} = \&reset_server;
|
381
|
$SIG{INT} = \&handle_sigterm; # for interactive debugging
|
382
|
$SIG{TERM} = \&handle_sigterm;
|
383
|
$SIG{PIPE} = 'IGNORE';
|
384
|
|
385
|
#
|
386
|
# load previously saved state
|
387
|
#
|
388
|
if (exists $opt{"l"}) {
|
389
|
if ($opt{"l"}) {
|
390
|
# If -l was given an argument (all, disabled, opstatus, etc...)
|
391
|
# pass that to load_state
|
392
|
load_state($opt{"l"});
|
393
|
}else{
|
394
|
# Otherwise default to old behavior of just loading disabled hosts/services/groups
|
395
|
load_state("disabled");
|
396
|
}
|
397
|
}
|
398
|
|
399
|
|
400
|
|
401
|
syslog ('info', "mon server started");
|
402
|
|
403
|
#
|
404
|
# startup alerts
|
405
|
#
|
406
|
do_startup_alerts();
|
407
|
|
408
|
#
|
409
|
# main monitoring loop
|
410
|
#
|
411
|
for (;;) {
|
412
|
debug (1, "$i" . ($STOPPED ? " (stopped)" : "") . "\n");
|
413
|
$i++;
|
414
|
$tm = time;
|
415
|
|
416
|
#
|
417
|
# step through the watch groups, decrementing and
|
418
|
# handing expired timers
|
419
|
#
|
420
|
if (!$STOPPED) {
|
421
|
if (defined $CF{"EXCLUDE_PERIOD"}
|
422
|
&& $CF{"EXCLUDE_PERIOD"} ne "" &&
|
423
|
inPeriod (time, $CF{"EXCLUDE_PERIOD"})) {
|
424
|
debug (1, "not running monitors because of global exclude_period\n");
|
425
|
} else {
|
426
|
foreach my $group (keys %watch) {
|
427
|
foreach my $service (keys %{$watch{$group}}) {
|
428
|
|
429
|
my $sref = \%{$watch{$group}->{$service}};
|
430
|
|
431
|
my $t = $tm - $lasttm;
|
432
|
$t = 1 if ($t <= 0);
|
433
|
|
434
|
#
|
435
|
# trap timer
|
436
|
#
|
437
|
if ($sref->{"traptimeout"}) {
|
438
|
$sref->{"_trap_timer"} -= $t;
|
439
|
|
440
|
if ($sref->{"_trap_timer"} <= 0 &&
|
441
|
$tm - $sref->{"_last_trap"} > $sref->{"traptimeout"})
|
442
|
{
|
443
|
$sref->{"_trap_timer"} = $sref->{"traptimeout"};
|
444
|
handle_trap_timeout ($group, $service);
|
445
|
}
|
446
|
}
|
447
|
|
448
|
#
|
449
|
# trap duration timer
|
450
|
#
|
451
|
if (defined ($sref->{"_trap_duration_timer"})) {
|
452
|
$sref->{"_trap_duration_timer"} -= $t;
|
453
|
|
454
|
if ($sref->{"_trap_duration_timer"} <= 0) {
|
455
|
set_op_status ($group, $service, $STAT_OK);
|
456
|
undef $sref->{"_trap_duration_timer"};
|
457
|
}
|
458
|
}
|
459
|
|
460
|
#
|
461
|
# polling monitor timer
|
462
|
#
|
463
|
if ($sref->{"interval"} && $sref->{"_timer"} <= 0 &&
|
464
|
!$running{"$group/$service"})
|
465
|
{
|
466
|
if (!$CF{"MAXPROCS"} || $procs < $CF{"MAXPROCS"})
|
467
|
{
|
468
|
if (defined $sref->{"exclude_period"}
|
469
|
&& $sref->{"exclude_period"} ne "" &&
|
470
|
inPeriod (time, $sref->{"exclude_period"}))
|
471
|
{
|
472
|
debug (1, "not running $group,$service because of exclude_period\n");
|
473
|
}
|
474
|
|
475
|
elsif (($sref->{"dep_behavior"} eq "m" &&
|
476
|
defined $sref->{"depend"} && $sref->{"depend"} ne "")
|
477
|
|| (defined $sref->{"monitordepend"} && $sref->{"monitordepend"} ne ""))
|
478
|
{
|
479
|
if (dep_ok ($sref, 'm'))
|
480
|
{
|
481
|
run_monitor ($group, $service);
|
482
|
}
|
483
|
|
484
|
else
|
485
|
{
|
486
|
debug (1, "not running $group,$service because of depend\n");
|
487
|
}
|
488
|
}
|
489
|
|
490
|
else
|
491
|
{
|
492
|
run_monitor ($group, $service);
|
493
|
}
|
494
|
}
|
495
|
|
496
|
else
|
497
|
{
|
498
|
syslog ('info', "throttled at $procs processes");
|
499
|
}
|
500
|
}
|
501
|
|
502
|
else
|
503
|
{
|
504
|
$sref->{"_timer"} -= $t;
|
505
|
if ($sref->{"_timer"} < 0)
|
506
|
{
|
507
|
$sref->{"_timer"} = 0;
|
508
|
}
|
509
|
}
|
510
|
}
|
511
|
}
|
512
|
}
|
513
|
}
|
514
|
|
515
|
$lasttm = time;
|
516
|
|
517
|
#
|
518
|
# collect any output from subprocs
|
519
|
#
|
520
|
collect_output;
|
521
|
|
522
|
#
|
523
|
# clean up after exited processes, and trigger alerts
|
524
|
#
|
525
|
proc_cleanup;
|
526
|
|
527
|
#
|
528
|
# handle client, server, and trap I/O
|
529
|
# this routine sleeps for $SLEEPINT if no I/O is ready
|
530
|
#
|
531
|
handle_io;
|
532
|
}
|
533
|
|
534
|
die "not reached";
|
535
|
|
536
|
END {
|
537
|
unlink $CF{"PIDFILE"} if $$ == $pid_file_owner && $CF{"PIDFILE"} ne '';
|
538
|
}
|
539
|
|
540
|
|
541
|
##############################################################################
|
542
|
|
543
|
#
|
544
|
# startup alerts
|
545
|
#
|
546
|
sub do_startup_alerts {
|
547
|
foreach my $group (keys %watch) {
|
548
|
foreach my $service (keys %{$watch{$group}}) {
|
549
|
do_alert ($group, $service, "", 0, $FL_STARTUPALERT);
|
550
|
}
|
551
|
}
|
552
|
}
|
553
|
|
554
|
|
555
|
#
|
556
|
# handle alert event, throttling the alert call if necessary
|
557
|
#
|
558
|
sub do_alert {
|
559
|
my ($group, $service, $output, $retval, $flags) = @_;
|
560
|
my (@groupargs, $last_alert, $alert);
|
561
|
my ($sref, $range, @alerts);
|
562
|
|
563
|
debug (1, "do_alert flags=$flags\n");
|
564
|
|
565
|
$sref = \%{$watch{$group}->{$service}};
|
566
|
|
567
|
my $tmnow = time;
|
568
|
|
569
|
if ($STOPPED) {
|
570
|
syslog ("notice", "ignoring alert for $group,$service because the mon scheduler is stopped");
|
571
|
return;
|
572
|
}
|
573
|
|
574
|
#
|
575
|
# if redistribute it set, call it now
|
576
|
#
|
577
|
if ($sref->{"redistribute"} ne '')
|
578
|
{
|
579
|
my ($fac, $args);
|
580
|
($fac, $args) = split (/\s+/, $sref->{"redistribute"}, 2);
|
581
|
call_alert (
|
582
|
group => $group,
|
583
|
service => $service,
|
584
|
output => $output,
|
585
|
retval => $retval,
|
586
|
flags => $flags | $FL_REDISTRIBUTE,
|
587
|
|
588
|
alert => $fac,
|
589
|
args => $args,
|
590
|
)
|
591
|
}
|
592
|
|
593
|
#
|
594
|
# if the alarm is disabled, ignore it
|
595
|
#
|
596
|
if ((exists $watch_disabled{$group} && $watch_disabled{$group} == 1)
|
597
|
|| (defined $sref->{"disable"} && $sref->{"disable"} == 1))
|
598
|
{
|
599
|
syslog ("notice", "ignoring alert for $group,$service");
|
600
|
return;
|
601
|
}
|
602
|
|
603
|
#
|
604
|
# dependency check
|
605
|
#
|
606
|
if (!($flags & $FL_STARTUPALERT) &&
|
607
|
!($flags & $FL_UPALERT) &&
|
608
|
((defined $sref->{"depend"} && $sref->{"dep_behavior"} eq "a")
|
609
|
|| (defined $sref->{"alertdepend"})))
|
610
|
{
|
611
|
if (!$sref->{"_depend_status"})
|
612
|
{
|
613
|
debug (1, "alert for $group,$service supressed because of dep fail\n");
|
614
|
return;
|
615
|
}
|
616
|
}
|
617
|
|
618
|
my ($summary) = split("\n", $output);
|
619
|
$summary = "(NO SUMMARY)" if (!defined $summary || $summary =~ /^\s*$/m);
|
620
|
my ($prevsumm) = split("\n", $sref->{"_failure_output"}) if (defined $sref->{"_failure_output"});
|
621
|
$prevsumm = "(NO SUMMARY)" if (!defined $prevsumm || $prevsumm =~ /^\s*$/m);
|
622
|
|
623
|
|
624
|
my $strippedsummary = $summary;
|
625
|
$strippedsummary =~ s/\s//mg;
|
626
|
my $strippedprevious = $prevsumm;
|
627
|
$strippedprevious =~ s/\s//mg;
|
628
|
# If the summary changed, un-acknowledge the service if 'unack_summary' is set
|
629
|
if ($sref->{'_ack'} != 0
|
630
|
&& $sref->{'unack_summary'} == 1
|
631
|
&& $strippedsummary ne $strippedprevious
|
632
|
&& !($flags & ($FL_UPALERT|$FL_ACKALERT|$FL_DISABLEALERT))) {
|
633
|
print STDERR "Unacking $group/$service:\nSummary: X".$strippedsummary."X\nPrevious: X".$strippedprevious."X\n";
|
634
|
$sref->{"_ack"} = 0;
|
635
|
$sref->{"_ack_comment"} = "";
|
636
|
$sref->{"_consec_failures"}=1;
|
637
|
foreach my $period (keys %{$sref->{"periods"}})
|
638
|
{
|
639
|
$sref->{"periods"}->{$period}->{"_last_alert"} = 0;
|
640
|
# $sref->{"periods"}->{$period}->{"_alert_sent"} = 0;
|
641
|
$sref->{"periods"}->{$period}->{"_1stfailtime"} = 0;
|
642
|
$sref->{"periods"}->{$period}->{"_failcount"} = 0;
|
643
|
}
|
644
|
}
|
645
|
|
646
|
#
|
647
|
# no alerts for ack'd failures, except for upalerts or summary changes
|
648
|
# when observe_summary is set
|
649
|
#
|
650
|
if ($sref->{"_ack"} != 0 && !($flags & ($FL_UPALERT|$FL_ACKALERT|$FL_DISABLEALERT)))
|
651
|
{
|
652
|
syslog ("debug", "no alert for $group.$service" .
|
653
|
" because of ack'd failure");
|
654
|
return;
|
655
|
}
|
656
|
|
657
|
#
|
658
|
# check each time period for pending alerts
|
659
|
#
|
660
|
foreach my $periodlabel (keys %{$sref->{"periods"}})
|
661
|
{
|
662
|
#
|
663
|
# only send alerts that are in the proper period
|
664
|
#
|
665
|
next if (!inPeriod ($tmnow, $sref->{"periods"}->{$periodlabel}->{"period"}));
|
666
|
|
667
|
my $pref = \%{$sref->{"periods"}->{$periodlabel}};
|
668
|
|
669
|
#
|
670
|
# skip upalerts/ackalerts not paired with down alerts
|
671
|
# disable by setting "no_comp_alerts" in period section
|
672
|
#
|
673
|
if (!$pref->{"no_comp_alerts"} && ($flags & ($FL_UPALERT | $FL_ACKALERT)) && !$pref->{"_alert_sent"})
|
674
|
{
|
675
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing upalert since no down alert was sent.") if ($flags & $FL_UPALERT);
|
676
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing ackalert since no down alert was sent.") if ($flags & $FL_ACKALERT);
|
677
|
next;
|
678
|
}
|
679
|
|
680
|
#
|
681
|
# skip looping upalerts when "no_comp-alerts" set.
|
682
|
#
|
683
|
if ($pref->{"no_comp_alerts"} && ($flags & $FL_UPALERT) && ($pref->{"_no_comp_alerts_upalert_sent"}>0))
|
684
|
{
|
685
|
next;
|
686
|
}
|
687
|
|
688
|
#
|
689
|
# do this if we're not handling an upalert, startupalert, ackalert, or disablealert
|
690
|
#
|
691
|
if (!($flags & $FL_UPALERT) && !($flags & $FL_STARTUPALERT) && !($flags & $FL_DISABLEALERT) && !($flags & $FL_ACKALERT))
|
692
|
{
|
693
|
#
|
694
|
# alert only when exit code matches
|
695
|
#
|
696
|
|
697
|
if (exists $pref->{"alertexitrange"}) {
|
698
|
next if (!inRange($retval, $pref->{"alertexitrange"}));
|
699
|
}
|
700
|
|
701
|
#
|
702
|
# alert only numalerts
|
703
|
#
|
704
|
if ($pref->{"numalerts"} &&
|
705
|
$pref->{"_alert_sent"} >= $pref->{"numalerts"})
|
706
|
{
|
707
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert since numalerts is met.");
|
708
|
next;
|
709
|
}
|
710
|
|
711
|
#
|
712
|
# only alert once every "alertevery" seconds, unless
|
713
|
# output from monitor is different or if strict alertevery
|
714
|
#
|
715
|
# strict and _ignore_summary are basically the same though
|
716
|
# strict short-circuits and overrides other settings and exists
|
717
|
# for compatibility with pre-1.1 configs
|
718
|
#
|
719
|
if ($pref->{"alertevery"} != 0 && # if alertevery is set and
|
720
|
($tmnow - $pref->{"_last_alert"} < $pref->{"alertevery"}) && # we're within the time period and one of these:
|
721
|
(($pref->{"_alertevery_strict"}) || # [ strict is set or
|
722
|
($pref->{"_observe_detail"} && $sref->{"_failure_output"} eq $output) || # observing detail and output hasn't changed or
|
723
|
(!$pref->{"_observe_detail"} && (!$pref->{"_ignore_summary"}) && ($prevsumm eq $summary)) || # not observing detail
|
724
|
# and not ignoring summary and summ hasn't changed or
|
725
|
($pref->{"_ignore_summary"}))) # we're ignoring summary changes ]
|
726
|
{
|
727
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertevery.");
|
728
|
next;
|
729
|
}
|
730
|
|
731
|
#
|
732
|
# alertafter NUM
|
733
|
#
|
734
|
if (defined $pref->{"alertafter_consec"} && ($sref->{"_consec_failures"} < $pref->{"alertafter_consec"}))
|
735
|
{
|
736
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter consecutive failures.");
|
737
|
next;
|
738
|
}
|
739
|
|
740
|
#
|
741
|
# alertafter timeval
|
742
|
#
|
743
|
elsif ( (!defined ($pref->{"alertafter"})) && (defined ($pref->{"alertafterival"})) )
|
744
|
{
|
745
|
$pref->{'_1stfailtime'} = $tmnow if $pref->{'_1stfailtime'} == 0;
|
746
|
if ($tmnow - $pref->{'_1stfailtime'} <= $pref->{'alertafterival'})
|
747
|
{
|
748
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter numval.");
|
749
|
next;
|
750
|
}
|
751
|
}
|
752
|
|
753
|
#
|
754
|
# alertafter NUM timeval
|
755
|
#
|
756
|
elsif (defined ($pref->{"alertafter"}))
|
757
|
{
|
758
|
$pref->{"_failcount"}++;
|
759
|
|
760
|
if ($tmnow - $pref->{'_1stfailtime'} <= $pref->{'alertafterival'} &&
|
761
|
$pref->{"_failcount"} < $pref->{"alertafter"})
|
762
|
{
|
763
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter num timeval.");
|
764
|
next;
|
765
|
}
|
766
|
|
767
|
#
|
768
|
# start a new time interval
|
769
|
#
|
770
|
if ($tmnow - $pref->{'_1stfailtime'} > $pref->{'alertafterival'})
|
771
|
{
|
772
|
$pref->{"_failcount"} = 1;
|
773
|
}
|
774
|
|
775
|
if ($pref->{"_failcount"} == 1)
|
776
|
{
|
777
|
$pref->{"_1stfailtime"} = $tmnow;
|
778
|
}
|
779
|
|
780
|
if ($pref->{"_failcount"} < $pref->{"alertafter"})
|
781
|
{
|
782
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter num timeval.");
|
783
|
next;
|
784
|
}
|
785
|
}
|
786
|
}
|
787
|
|
788
|
#
|
789
|
# at this point, no alerts are blocked,
|
790
|
# so send the alerts
|
791
|
#
|
792
|
|
793
|
#
|
794
|
# trigger multiple alerts in this period
|
795
|
#
|
796
|
if ($flags & $FL_UPALERT)
|
797
|
{
|
798
|
@alerts = @{$pref->{"upalerts"}};
|
799
|
}
|
800
|
elsif ($flags & $FL_STARTUPALERT)
|
801
|
{
|
802
|
@alerts = @{$pref->{"startupalerts"}};
|
803
|
}
|
804
|
elsif ($flags & $FL_DISABLEALERT)
|
805
|
{
|
806
|
@alerts = @{$pref->{"disablealerts"}};
|
807
|
}
|
808
|
elsif ($flags & $FL_ACKALERT)
|
809
|
{
|
810
|
@alerts = @{$pref->{"ackalerts"}};
|
811
|
}
|
812
|
else
|
813
|
{
|
814
|
@alerts = @{$pref->{"alerts"}};
|
815
|
}
|
816
|
|
817
|
my $called = 0;
|
818
|
|
819
|
for (my $i=0;$i<@alerts;$i++)
|
820
|
{
|
821
|
my ($range, $fac, $args);
|
822
|
|
823
|
if ($alerts[$i] =~ /^exit\s*=\s*((\d+|\d+-\d+))\s/i)
|
824
|
{
|
825
|
$range=$1;
|
826
|
next if (!inRange($retval, $range));
|
827
|
($fac, $args) = (split (/\s+/, $alerts[$i], 3))[1,2];
|
828
|
}
|
829
|
else
|
830
|
{
|
831
|
($fac, $args) = split (/\s+/, $alerts[$i], 2);
|
832
|
}
|
833
|
|
834
|
$called++ if (call_alert (
|
835
|
group => $group,
|
836
|
service => $service,
|
837
|
output => $output,
|
838
|
retval => $retval,
|
839
|
flags => $flags,
|
840
|
|
841
|
pref => $pref,
|
842
|
alert => $fac,
|
843
|
args => $args,
|
844
|
)
|
845
|
);
|
846
|
}
|
847
|
|
848
|
#
|
849
|
# reset _alert_sent if up alert was sent from a trap
|
850
|
#
|
851
|
if ($called)
|
852
|
{
|
853
|
if( (($FL_TRAP | $flags) && ($FL_UPALERT & $flags)) ) {
|
854
|
$pref->{"_alert_sent"} = 0;
|
855
|
$pref->{"_last_alert"} = 0;
|
856
|
}
|
857
|
else {
|
858
|
$pref->{"_alert_sent"}++;
|
859
|
|
860
|
#
|
861
|
# reset _no_comp_alerts_upalert_sent counter - when service will be
|
862
|
# back up, upalert will be sent.
|
863
|
#
|
864
|
if ($pref->{"no_comp_alerts"}) {
|
865
|
$pref->{"_no_comp_alerts_upalert_sent"} = 0;
|
866
|
}
|
867
|
}
|
868
|
|
869
|
if ($pref->{"no_comp_alerts"} && ($flags & $FL_UPALERT)) {
|
870
|
$pref->{"_no_comp_alerts_upalert_sent"}++;
|
871
|
}
|
872
|
}
|
873
|
}
|
874
|
}
|
875
|
|
876
|
|
877
|
|
878
|
#
|
879
|
# walk through the watch list and reset the time
|
880
|
# the service was last called
|
881
|
#
|
882
|
sub set_last_test {
|
883
|
my ($i, $k, $t);
|
884
|
$t = time;
|
885
|
foreach $k (keys %watch)
|
886
|
{
|
887
|
foreach my $service (keys %{$watch{$k}})
|
888
|
{
|
889
|
$watch{$k}->{$service}->{"_timer"} = $watch{$k}->{$service}->{"interval"};
|
890
|
}
|
891
|
}
|
892
|
|
893
|
}
|
894
|
|
895
|
|
896
|
#
|
897
|
# parse configuration file
|
898
|
#
|
899
|
# build the following data structures:
|
900
|
#
|
901
|
# %group
|
902
|
# each element of %group is an array of hostnames
|
903
|
# group records are terminated by a blank line in the
|
904
|
# configuration file
|
905
|
# %watch{"group"}->{"service"}->{"variable"} = value
|
906
|
# %alias
|
907
|
#
|
908
|
sub read_cf {
|
909
|
my ($CF, $commit) = @_;
|
910
|
my ($var, $watchgroup, $ingroup, $curgroup, $inwatch,
|
911
|
$args, $hosts, %disabled, $h, $i,
|
912
|
$inalias, $curalias, $inview, $curview);
|
913
|
my ($sref, $pref);
|
914
|
my ($service, $period);
|
915
|
my ($authtype, @authtypes);
|
916
|
my $line_num = 0;
|
917
|
|
918
|
#
|
919
|
# parse configuration file
|
920
|
#
|
921
|
if (exists($opt{"M"}) || $CF =~ /\.m4$/)
|
922
|
{
|
923
|
my $m4 = "m4";
|
924
|
$m4 = $opt{"M"} if (defined($opt{"M"}));
|
925
|
return "could not open m4 pipe of cf file: $CF: $!"
|
926
|
if (!open (CFG, "$m4 $CF |"));
|
927
|
}
|
928
|
|
929
|
else
|
930
|
{
|
931
|
return "could not open cf file: $CF: $!"
|
932
|
if (!open (CFG, $CF));
|
933
|
}
|
934
|
|
935
|
#
|
936
|
# buffers to hold the new un-committed config
|
937
|
#
|
938
|
my %new_alias = ();
|
939
|
my %new_views = ();
|
940
|
my %new_CF = %CF;
|
941
|
my %new_groups;
|
942
|
my %new_watch;
|
943
|
|
944
|
my %is_watch;
|
945
|
|
946
|
my $servnum = 0;
|
947
|
|
948
|
my $DEP_BEHAVIOR = "a";
|
949
|
my $DEP_MEMORY = 0;
|
950
|
my $UNACK_SUMMARY = 0;
|
951
|
|
952
|
my $incomplete_line = 0;
|
953
|
my $linepart = "";
|
954
|
my $l = "";
|
955
|
my $acc_line = "";
|
956
|
|
957
|
for (;;)
|
958
|
{
|
959
|
#
|
960
|
# read in a logical "line", which may span actual lines
|
961
|
#
|
962
|
do
|
963
|
{
|
964
|
$line_num++;
|
965
|
last if (!defined ($linepart = <CFG>));
|
966
|
next if $linepart =~ /^\s*#/;
|
967
|
|
968
|
#
|
969
|
# accumulate multi-line lines (ones which are \-escaped)
|
970
|
#
|
971
|
if ($incomplete_line) { $linepart =~ s/^\s*//; }
|
972
|
|
973
|
if ($linepart =~ /^(.*)\\\s*$/)
|
974
|
{
|
975
|
$incomplete_line = 1;
|
976
|
$acc_line .= $1;
|
977
|
chomp $acc_line;
|
978
|
next;
|
979
|
}
|
980
|
|
981
|
else
|
982
|
{
|
983
|
$acc_line .= $linepart;
|
984
|
}
|
985
|
|
986
|
$l = $acc_line;
|
987
|
$acc_line = "";
|
988
|
|
989
|
chomp $l;
|
990
|
$l =~ s/^\s*//;
|
991
|
$l =~ s/\s*$//;
|
992
|
|
993
|
$incomplete_line = 0;
|
994
|
$linepart = "";
|
995
|
};
|
996
|
|
997
|
#
|
998
|
# global variables which can be overriden by the command line
|
999
|
#
|
1000
|
if (!$inwatch && $l =~ /^(\w+) \s* = \s* (.*) \s*$/ix)
|
1001
|
{
|
1002
|
if ($1 eq "alertdir") {
|
1003
|
$new_CF{"ALERTDIR"} = $2;
|
1004
|
|
1005
|
} elsif ($1 eq "basedir") {
|
1006
|
$new_CF{"BASEDIR"} = $2;
|
1007
|
$new_CF{"BASEDIR"} = "$PWD/$new_CF{BASEDIR}" if ($new_CF{"BASEDIR"} !~ m{^/});
|
1008
|
$new_CF{"BASEDIR"} =~ s{/$}{};
|
1009
|
|
1010
|
} elsif ($1 eq "cfbasedir") {
|
1011
|
$new_CF{"CFBASEDIR"} = $2;
|
1012
|
$new_CF{"CFBASEDIR"} = "$PWD/$new_CF{CFBASEDIR}" if ($new_CF{"CFBASEDIR"} !~ m{^/});
|
1013
|
$new_CF{"CFBASEDIR"} =~ s{/$}{};
|
1014
|
|
1015
|
} elsif ($1 eq "mondir") {
|
1016
|
$new_CF{"SCRIPTDIR"} = $2;
|
1017
|
|
1018
|
} elsif ($1 eq "logdir") {
|
1019
|
$new_CF{"LOGDIR"} = $2;
|
1020
|
|
1021
|
} elsif ($1 eq "histlength") {
|
1022
|
$new_CF{"MAX_KEEP"} = $2;
|
1023
|
|
1024
|
} elsif ($1 eq "serverport") {
|
1025
|
$new_CF{"SERVPORT"} = $2;
|
1026
|
|
1027
|
} elsif ($1 eq "trapport") {
|
1028
|
$new_CF{"TRAPPORT"} = $2;
|
1029
|
|
1030
|
} elsif ($1 eq "serverbind") {
|
1031
|
$new_CF{"SERVERBIND"} = $2;
|
1032
|
|
1033
|
} elsif ($1 eq "clientallow") {
|
1034
|
$new_CF{"CLIENTALLOW"}= $2;
|
1035
|
|
1036
|
} elsif ($1 eq "trapbind") {
|
1037
|
$new_CF{"TRAPBIND"} = $2;
|
1038
|
|
1039
|
} elsif ($1 eq "pidfile") {
|
1040
|
$new_CF{"PIDFILE"} = $2;
|
1041
|
|
1042
|
} elsif ($1 eq "randstart") {
|
1043
|
$new_CF{"RANDSTART"} = dhmstos($2);
|
1044
|
if (!defined ($new_CF{"RANDSTART"})) {
|
1045
|
close (CFG);
|
1046
|
return "cf error: bad value '$2' for randstart option (syntax: randstart = timeval), line $line_num";
|
1047
|
}
|
1048
|
|
1049
|
} elsif ($1 eq "maxprocs") {
|
1050
|
$new_CF{"MAXPROCS"} = $2;
|
1051
|
|
1052
|
} elsif ($1 eq "statedir") {
|
1053
|
$new_CF{"STATEDIR"} = $2;
|
1054
|
|
1055
|
} elsif ($1 eq "authfile") {
|
1056
|
$new_CF{"AUTHFILE"} = $2;
|
1057
|
if (! -r $new_CF{"AUTHFILE"}) {
|
1058
|
close (CFG);
|
1059
|
return "cf error: authfile '$2' does not exist or is not readable, line $line_num";
|
1060
|
}
|
1061
|
|
1062
|
} elsif ($1 eq "authtype") {
|
1063
|
$new_CF{"AUTHTYPE"} = $2;
|
1064
|
@authtypes = split(' ' , $new_CF{"AUTHTYPE"}) ;
|
1065
|
foreach $authtype (@authtypes) {
|
1066
|
if ($authtype eq "pam") {
|
1067
|
eval 'use Authen::PAM qw(:constants);' ;
|
1068
|
if ($@ ne "") {
|
1069
|
close (CFG);
|
1070
|
return "cf error: could not use PAM authentication: $@";
|
1071
|
}
|
1072
|
}
|
1073
|
}
|
1074
|
|
1075
|
} elsif ($1 eq "pamservice") {
|
1076
|
$new_CF{"PAMSERVICE"} = $2;
|
1077
|
|
1078
|
} elsif ($1 eq "userfile") {
|
1079
|
$new_CF{"USERFILE"} = $2;
|
1080
|
if (! -r $new_CF{"USERFILE"}) {
|
1081
|
close (CFG);
|
1082
|
return "cf error: userfile '$2' does not exist or is not readable, line $line_num";
|
1083
|
}
|
1084
|
|
1085
|
} elsif ($1 eq "historicfile") {
|
1086
|
$new_CF{"HISTORICFILE"} = $2;
|
1087
|
|
1088
|
} elsif ($1 eq "historictime") {
|
1089
|
$new_CF{"HISTORICTIME"} = dhmstos($2);
|
1090
|
if (!defined $new_CF{"HISTORICTIME"}) {
|
1091
|
close (CFG);
|
1092
|
return "cf error: bad value '$2' for historictime command (syntax: historictime = timeval), line $line_num";
|
1093
|
}
|
1094
|
|
1095
|
} elsif ($1 eq "cltimeout") {
|
1096
|
$new_CF{"CLIENT_TIMEOUT"} = dhmstos($2);
|
1097
|
if (!defined ($new_CF{"CLIENT_TIMEOUT"})) {
|
1098
|
close (CFG);
|
1099
|
return "cf error: bad value '$2' for cltimeout command (syntax: cltimeout = secs), line $line_num";
|
1100
|
}
|
1101
|
|
1102
|
} elsif ($1 eq "monerrfile") {
|
1103
|
$new_CF{"MONERRFILE"} = $2;
|
1104
|
|
1105
|
} elsif ($1 eq "dtlogfile") {
|
1106
|
$new_CF{"DTLOGFILE"} = $2;
|
1107
|
|
1108
|
} elsif ($1 eq "dtlogging") {
|
1109
|
$new_CF{"DTLOGGING"} = 0;
|
1110
|
if ($2 == 1 || $2 eq "yes" || $2 eq "true") {
|
1111
|
$new_CF{"DTLOGGING"} = 1;
|
1112
|
}
|
1113
|
|
1114
|
} elsif ($1 eq "dep_recur_limit") {
|
1115
|
$new_CF{"DEP_RECUR_LIMIT"} = $2;
|
1116
|
|
1117
|
} elsif ($1 eq "dep_behavior") {
|
1118
|
if ($2 ne "m" && $2 ne "a" && $2 ne "hm") {
|
1119
|
close (CFG);
|
1120
|
return "cf error: unknown dependency behavior '$2', line $line_num";
|
1121
|
}
|
1122
|
$DEP_BEHAVIOR = $2;
|
1123
|
|
1124
|
} elsif ($1 eq "dep_memory") {
|
1125
|
my $memory = dhmstos($2);
|
1126
|
if (!defined $memory) {
|
1127
|
close (CFG);
|
1128
|
return "cf error: bad value '$2' for dep_memory option (syntax: dep_memory = timeval), line $line_num";
|
1129
|
}
|
1130
|
$DEP_MEMORY = $memory;
|
1131
|
|
1132
|
} elsif ($1 eq "unack_summary") {
|
1133
|
if (defined $2) {
|
1134
|
if ($2 =~ /y(es)?/i) {
|
1135
|
$UNACK_SUMMARY = 1;
|
1136
|
} elsif ($2 =~ /n(o)?/i) {
|
1137
|
$UNACK_SUMMARY = 0;
|
1138
|
} elsif ($2 eq "0" || $2 eq "1") {
|
1139
|
$UNACK_SUMMARY = $2;
|
1140
|
} else {
|
1141
|
return "cf error: invalid unack_summary value '$2' (syntax: unack_summary [0|1|y|yes|n|no])";
|
1142
|
}
|
1143
|
} else {
|
1144
|
$UNACK_SUMMARY = 1;
|
1145
|
}
|
1146
|
|
1147
|
} elsif ($1 eq "syslog_facility") {
|
1148
|
$new_CF{"SYSLOG_FACILITY"} = $2;
|
1149
|
|
1150
|
} elsif ($1 eq "startupalerts_on_reset") {
|
1151
|
if ($2 =~ /^1|yes|true|on$/i) {
|
1152
|
$new_CF{"STARTUPALERTS_ON_RESET"} = 1;
|
1153
|
} else {
|
1154
|
$new_CF{"STARTUPALERTS_ON_RESET"} = 0;
|
1155
|
}
|
1156
|
|
1157
|
} elsif ($1 eq "monremote") {
|
1158
|
$new_CF{"MONREMOTE"} = $2;
|
1159
|
|
1160
|
} elsif ($1 eq "exclude_period") {
|
1161
|
if (inPeriod (time, $2) == -1)
|
1162
|
{
|
1163
|
close (CFG);
|
1164
|
return "cf error: malformed exclude_period '$2' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
|
1165
|
}
|
1166
|
$new_CF{"EXCLUDE_PERIOD"} = $2;
|
1167
|
} else {
|
1168
|
close (CFG);
|
1169
|
return "cf error: unknown variable '$1', line $line_num";
|
1170
|
}
|
1171
|
|
1172
|
next;
|
1173
|
}
|
1174
|
|
1175
|
#
|
1176
|
# end of record
|
1177
|
#
|
1178
|
if ($l eq "")
|
1179
|
{
|
1180
|
$ingroup = 0;
|
1181
|
$inalias = 0;
|
1182
|
$inwatch = 0;
|
1183
|
$period = 0;
|
1184
|
$inview = 0;
|
1185
|
|
1186
|
$curgroup = "";
|
1187
|
$curalias = "";
|
1188
|
$watchgroup = "";
|
1189
|
|
1190
|
$servnum = 0;
|
1191
|
next;
|
1192
|
}
|
1193
|
|
1194
|
#
|
1195
|
# hostgroup record
|
1196
|
#
|
1197
|
if ($l =~ /^hostgroup\s+([a-zA-Z0-9_.-]+)\s*(.*)/)
|
1198
|
{
|
1199
|
$curgroup = $1;
|
1200
|
|
1201
|
$ingroup = 1;
|
1202
|
$inview = 0;
|
1203
|
$inalias = 0;
|
1204
|
$inwatch = 0;
|
1205
|
$period = 0;
|
1206
|
|
1207
|
|
1208
|
$hosts = $2;
|
1209
|
%disabled = ();
|
1210
|
|
1211
|
foreach $h (grep (/^\*/, @{$groups{$curgroup}}))
|
1212
|
{
|
1213
|
# We have to make $i = $h because $h is actually
|
1214
|
# a pointer to %groups and will modify it.
|
1215
|
$i = $h;
|
1216
|
$i =~ s/^\*//;
|
1217
|
$disabled{$i} = 1;
|
1218
|
}
|
1219
|
|
1220
|
@{$new_groups{$curgroup}} = split(/\s+/, $hosts);
|
1221
|
|
1222
|
#
|
1223
|
# keep hosts which were previously disabled
|
1224
|
#
|
1225
|
for ($i=0;$i<@{$new_groups{$curgroup}};$i++)
|
1226
|
{
|
1227
|
$new_groups{$curgroup}[$i] = "*$new_groups{$curgroup}[$i]"
|
1228
|
if ($disabled{$new_groups{$curgroup}[$i]});
|
1229
|
}
|
1230
|
|
1231
|
next;
|
1232
|
}
|
1233
|
|
1234
|
if ($ingroup)
|
1235
|
{
|
1236
|
push (@{$new_groups{$curgroup}}, split(/\s+/, $l));
|
1237
|
|
1238
|
for ($i=0;$i<@{$new_groups{$curgroup}};$i++)
|
1239
|
{
|
1240
|
$new_groups{$curgroup}[$i] = "*$new_groups{$curgroup}[$i]"
|
1241
|
if ($disabled{$new_groups{$curgroup}[$i]});
|
1242
|
}
|
1243
|
|
1244
|
next;
|
1245
|
}
|
1246
|
|
1247
|
#
|
1248
|
# alias record
|
1249
|
#
|
1250
|
if ($l =~ /^alias\s+([a-zA-Z0-9_.-]+)\s*$/)
|
1251
|
{
|
1252
|
$inalias = 1;
|
1253
|
$inview = 0;
|
1254
|
$ingroup = 0;
|
1255
|
$inwatch = 0;
|
1256
|
$period = 0;
|
1257
|
|
1258
|
$curalias = $1;
|
1259
|
next;
|
1260
|
}
|
1261
|
|
1262
|
if ($inalias)
|
1263
|
{
|
1264
|
if ($l =~ /\A(.*)\Z/)
|
1265
|
{
|
1266
|
push (@{$new_alias{$curalias}}, $1);
|
1267
|
next;
|
1268
|
}
|
1269
|
}
|
1270
|
|
1271
|
#
|
1272
|
# view record
|
1273
|
#
|
1274
|
if ($l =~ /^view\s+([a-zA-Z0-9_.-]+)\s+(.*)$/)
|
1275
|
{
|
1276
|
$inview = 1;
|
1277
|
$inalias = 0;
|
1278
|
$ingroup = 0;
|
1279
|
$inwatch = 0;
|
1280
|
$period = 0;
|
1281
|
|
1282
|
$curview = $1;
|
1283
|
$new_views{$curview}={};
|
1284
|
|
1285
|
foreach (split(/\s+/, $2)) {
|
1286
|
$new_views{$curview}->{$_} = 1;
|
1287
|
};
|
1288
|
next;
|
1289
|
}
|
1290
|
|
1291
|
if ($inview)
|
1292
|
{
|
1293
|
foreach (split(/\s+/, $l)) {
|
1294
|
$new_views{$curview}->{$_} = 1;
|
1295
|
};
|
1296
|
next;
|
1297
|
}
|
1298
|
|
1299
|
#
|
1300
|
# watch record
|
1301
|
#
|
1302
|
if ($l =~ /^watch\s+([a-zA-Z0-9_.-]+)\s*/)
|
1303
|
{
|
1304
|
$watchgroup = $1;
|
1305
|
$inwatch = 1;
|
1306
|
$inview = 0;
|
1307
|
$inalias = 0;
|
1308
|
$ingroup = 0;
|
1309
|
$period = 0;
|
1310
|
|
1311
|
if (!defined ($new_groups{$watchgroup}))
|
1312
|
{
|
1313
|
#
|
1314
|
# This hostgroup doesn't exist yet, we'll create it and warn
|
1315
|
#
|
1316
|
@{$new_groups{$watchgroup}} = ($watchgroup);
|
1317
|
print STDERR "Warning: watch group $watchgroup defined with no corresponding hostgroup.\n";
|
1318
|
}
|
1319
|
if ($new_watch{$watchgroup})
|
1320
|
{
|
1321
|
close (CFG);
|
1322
|
return "cf error: watch '$watchgroup' already defined, line $line_num";
|
1323
|
}
|
1324
|
|
1325
|
$curgroup = "";
|
1326
|
$service = "";
|
1327
|
|
1328
|
next;
|
1329
|
}
|
1330
|
|
1331
|
if ($inwatch)
|
1332
|
{
|
1333
|
#
|
1334
|
# env variables
|
1335
|
#
|
1336
|
if ($l =~ /^([A-Z_][A-Z0-9_]*)=(.*)/)
|
1337
|
{
|
1338
|
if ($service eq "") {
|
1339
|
close (CFG);
|
1340
|
return "cf error: environment variable defined without a service, line $line_num";
|
1341
|
}
|
1342
|
$new_watch{$watchgroup}->{$service}->{"ENV"}->{$1} = $2;
|
1343
|
|
1344
|
next;
|
1345
|
}
|
1346
|
|
1347
|
#
|
1348
|
# non-env variables
|
1349
|
#
|
1350
|
else
|
1351
|
{
|
1352
|
$l =~ /^(\w+)\s*(.*)$/;
|
1353
|
$var = $1;
|
1354
|
$args = $2;
|
1355
|
}
|
1356
|
|
1357
|
#
|
1358
|
# service entry
|
1359
|
#
|
1360
|
if ($var eq "service")
|
1361
|
{
|
1362
|
$service = $args;
|
1363
|
|
1364
|
if ($service !~ /^[a-zA-Z0-9_.-]+$/) {
|
1365
|
close (CFG);
|
1366
|
return "cf error: invalid service tag '$args', line $line_num";
|
1367
|
}
|
1368
|
|
1369
|
elsif (exists $new_watch{$watchgroup}->{$service})
|
1370
|
{
|
1371
|
close (CFG);
|
1372
|
return "cf error: service $service already defined for watch group $watchgroup, line $line_num";
|
1373
|
}
|
1374
|
|
1375
|
$period = 0;
|
1376
|
$sref = \%{$new_watch{$watchgroup}->{$service}};
|
1377
|
$sref->{"service"} = $args;
|
1378
|
$sref->{"interval"} = undef;
|
1379
|
$sref->{"randskew"} = 0;
|
1380
|
$sref->{"redistribute"} = "";
|
1381
|
$sref->{"dep_behavior"} = $DEP_BEHAVIOR;
|
1382
|
$sref->{"dep_memory"} = $DEP_MEMORY;
|
1383
|
$sref->{"exclude_period"} = "";
|
1384
|
$sref->{"exclude_hosts"} = {};
|
1385
|
$sref->{"_op_status"} = $STAT_UNTESTED;
|
1386
|
$sref->{"_last_op_status"} = $STAT_UNTESTED;
|
1387
|
$sref->{"_ack"} = 0;
|
1388
|
$sref->{"_ack_comment"} = '';
|
1389
|
$sref->{"unack_summary"} = $UNACK_SUMMARY;
|
1390
|
$sref->{"_consec_failures"} = 0;
|
1391
|
$sref->{"_failure_count"} = 0 if (!defined($sref->{"_failure_count"}));
|
1392
|
$sref->{"_start_of_monitor"} = time if (!defined($sref->{"_start_of_monitor"}));
|
1393
|
$sref->{"_alert_count"} = 0 if (!defined($sref->{"_alert_count"}));
|
1394
|
$sref->{"_last_failure"} = 0 if (!defined($sref->{"_last_failure"}));
|
1395
|
$sref->{"_last_success"} = 0 if (!defined($sref->{"_last_success"}));
|
1396
|
$sref->{"_last_trap"} = 0 if (!defined($sref->{"_last_trap"}));
|
1397
|
$sref->{"_last_traphost"} = '' if (!defined($sref->{"_last_traphost"}));
|
1398
|
$sref->{"_exitval"} = "undef" if (!defined($sref->{"_exitval"}));
|
1399
|
$sref->{"_last_check"} = undef;
|
1400
|
#
|
1401
|
# -1 for _monitor_duration means no monitor has been run yet
|
1402
|
# so there is no duration data available
|
1403
|
#
|
1404
|
$sref->{"_monitor_duration"} = -1;
|
1405
|
$sref->{"_monitor_running"} = 0;
|
1406
|
$sref->{"_depend_status"} = undef;
|
1407
|
$sref->{"failure_interval"} = undef;
|
1408
|
$sref->{"_old_interval"} = undef;
|
1409
|
next;
|
1410
|
}
|
1411
|
|
1412
|
if ($service eq "")
|
1413
|
{
|
1414
|
close (CFG);
|
1415
|
return "cf error: need to specify service in watch record, line $line_num";
|
1416
|
}
|
1417
|
|
1418
|
|
1419
|
#
|
1420
|
# period definition
|
1421
|
#
|
1422
|
# for each service there can be one or more alert periods
|
1423
|
# this is stored as an array of hashes named
|
1424
|
# %{$watch{$watchgroup}->{$service}->{"periods"}}
|
1425
|
# each index for this hash is a unique tag for the period as
|
1426
|
# defined by the user or named after the period (such as
|
1427
|
# "wd {Mon-Fri} hr {7am-11pm}")
|
1428
|
#
|
1429
|
# the value of the hash is an array containing the list of alert commands
|
1430
|
# and arguments, so
|
1431
|
#
|
1432
|
# @alerts = @{$watch{$watchgroup}->{$service}->{"periods"}->{"TAG"}}
|
1433
|
#
|
1434
|
if ($var eq "period")
|
1435
|
{
|
1436
|
$period = 1;
|
1437
|
|
1438
|
my $periodstr;
|
1439
|
|
1440
|
if ($args =~ /^([a-z_]\w*) \s* : \s* (.*)$/ix)
|
1441
|
{
|
1442
|
$periodstr = $1;
|
1443
|
$args = $2;
|
1444
|
}
|
1445
|
|
1446
|
else
|
1447
|
{
|
1448
|
$periodstr = $args;
|
1449
|
}
|
1450
|
|
1451
|
if (exists $sref->{"periods"}->{$periodstr})
|
1452
|
{
|
1453
|
close (CFG);
|
1454
|
return "cf error: period '$periodstr' already defined for watch group $watchgroup service $service, line $line_num";
|
1455
|
}
|
1456
|
|
1457
|
$pref = \%{$sref->{"periods"}->{$periodstr}};
|
1458
|
|
1459
|
if (inPeriod (time, $args) == -1)
|
1460
|
{
|
1461
|
close (CFG);
|
1462
|
return "cf error: malformed period '$args' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
|
1463
|
}
|
1464
|
|
1465
|
$pref->{"period"} = $args;
|
1466
|
$pref->{"alertevery"} = 0;
|
1467
|
$pref->{"numalerts"} = 0;
|
1468
|
$pref->{"_alert_sent"} = 0;
|
1469
|
$pref->{"no_comp_alerts"} = 0;
|
1470
|
$pref->{"_no_comp_alerts_upalert_sent"} = 0;
|
1471
|
@{$pref->{"alerts"}} = ();
|
1472
|
@{$pref->{"upalerts"}} = ();
|
1473
|
@{$pref->{"ackalerts"}} = ();
|
1474
|
@{$pref->{"disablealerts"}} = ();
|
1475
|
@{$pref->{"startupalerts"}} = ();
|
1476
|
next;
|
1477
|
}
|
1478
|
|
1479
|
#
|
1480
|
# period variables
|
1481
|
#
|
1482
|
if ($period)
|
1483
|
{
|
1484
|
if ($var eq "alert")
|
1485
|
{
|
1486
|
push @{$pref->{"alerts"}}, $args;
|
1487
|
}
|
1488
|
|
1489
|
elsif ($var eq "ackalert")
|
1490
|
{
|
1491
|
push @{$pref->{"ackalerts"}}, $args;
|
1492
|
}
|
1493
|
|
1494
|
elsif ($var eq "disablealert")
|
1495
|
{
|
1496
|
push @{$pref->{"disablealerts"}}, $args;
|
1497
|
}
|
1498
|
|
1499
|
elsif ($var eq "upalert")
|
1500
|
{
|
1501
|
$sref->{"_upalert"} = 1;
|
1502
|
push @{$pref->{"upalerts"}}, $args;
|
1503
|
}
|
1504
|
|
1505
|
elsif ($var eq "startupalert")
|
1506
|
{
|
1507
|
push @{$pref->{"startupalerts"}}, $args;
|
1508
|
}
|
1509
|
|
1510
|
elsif ($var eq "alertevery")
|
1511
|
{
|
1512
|
$pref->{"_observe_detail"} = 0;
|
1513
|
$pref->{"_alertevery_strict"} = 0;
|
1514
|
$pref->{"_ignore_summary"} = 0;
|
1515
|
|
1516
|
if ($args =~ /(\S+) \s+ observe_detail \s*$/ix)
|
1517
|
{
|
1518
|
$pref->{"_observe_detail"} = 1;
|
1519
|
$args = $1;
|
1520
|
}
|
1521
|
|
1522
|
elsif ($args =~ /(\S+) \s+ ignore_summary \s*$/ix)
|
1523
|
{
|
1524
|
$pref->{"_ignore_summary"} = 1;
|
1525
|
$args = $1;
|
1526
|
}
|
1527
|
|
1528
|
#
|
1529
|
# for backawards-compatibility with <= 0.38.21
|
1530
|
#
|
1531
|
elsif ($args =~ /(\S+) \s+ summary/ix)
|
1532
|
{
|
1533
|
$args = $1;
|
1534
|
}
|
1535
|
|
1536
|
#
|
1537
|
# strict
|
1538
|
#
|
1539
|
elsif ($args =~ /(\S+) \s+ strict \s*$/ix)
|
1540
|
{
|
1541
|
$pref->{"_alertevery_strict"} = 1;
|
1542
|
$args = $1;
|
1543
|
}
|
1544
|
|
1545
|
if (!($args = dhmstos ($args))) {
|
1546
|
close (CFG);
|
1547
|
return "cf error: invalid time interval '$args' (syntax: alertevery {positive number}{smhd} [ strict | observe_detail | ignore_summary ]), line $line_num";
|
1548
|
}
|
1549
|
|
1550
|
$pref->{"alertevery"} = $args;
|
1551
|
next;
|
1552
|
}
|
1553
|
|
1554
|
elsif ($var eq "alertafter")
|
1555
|
{
|
1556
|
my ($p1, $p2);
|
1557
|
|
1558
|
#
|
1559
|
# alertafter NUM
|
1560
|
#
|
1561
|
if ($args =~ /^(\d+)$/)
|
1562
|
{
|
1563
|
$p1 = $1;
|
1564
|
$pref->{"alertafter_consec"} = $p1;
|
1565
|
}
|
1566
|
|
1567
|
#
|
1568
|
# alertafter timeval
|
1569
|
#
|
1570
|
elsif ($args =~ /^(\d+[hms])$/)
|
1571
|
{
|
1572
|
$p1 = $1;
|
1573
|
if (!($p1 = dhmstos ($p1)))
|
1574
|
{
|
1575
|
close (CFG);
|
1576
|
return "cf error: invalid time interval '$args' (syntax: alertafter = [{positive integer}] [{positive number}{smhd}]), line $line_num";
|
1577
|
}
|
1578
|
|
1579
|
$pref->{"alertafterival"} = $p1;
|
1580
|
$pref->{"_1stfailtime"} = 0;
|
1581
|
}
|
1582
|
|
1583
|
#
|
1584
|
# alertafter NUM timeval
|
1585
|
#
|
1586
|
elsif ($args =~ /(\d+)\s+(\d+[hms])$/)
|
1587
|
{
|
1588
|
($p1, $p2) = ($1, $2);
|
1589
|
if (($p1 - 1) * $sref->{"interval"} >= dhmstos($p2))
|
1590
|
{
|
1591
|
close (CFG);
|
1592
|
return "cf error: interval & alertafter not sensible. No alerts can be generated with those parameters, line $line_num";
|
1593
|
}
|
1594
|
$pref->{"alertafter"} = $p1;
|
1595
|
$pref->{"alertafterival"} = dhmstos ($p2);
|
1596
|
|
1597
|
$pref->{"_1stfailtime"} = 0;
|
1598
|
$pref->{"_failcount"} = 0;
|
1599
|
}
|
1600
|
|
1601
|
else
|
1602
|
{
|
1603
|
close (CFG);
|
1604
|
return "cf error: invalid interval specification '$args', line $line_num";
|
1605
|
}
|
1606
|
}
|
1607
|
|
1608
|
elsif ($var eq "upalertafter")
|
1609
|
{
|
1610
|
if (!($args = dhmstos ($args))) {
|
1611
|
close (CFG);
|
1612
|
return "cf error: invalid upalertafter specification '$args' (syntax: upalertafter = {positive number}{smhd}), line $line_num";
|
1613
|
}
|
1614
|
|
1615
|
$pref->{"upalertafter"} = $args;
|
1616
|
}
|
1617
|
|
1618
|
elsif ($var eq "numalerts")
|
1619
|
{
|
1620
|
if ($args !~ /^\d+$/) {
|
1621
|
close (CFG);
|
1622
|
return "cf error: -numeric arg '$args' (syntax: numalerts = {positive integer}, line $line_num";
|
1623
|
}
|
1624
|
$pref->{"numalerts"} = $args;
|
1625
|
next;
|
1626
|
}
|
1627
|
|
1628
|
elsif ($var eq "no_comp_alerts")
|
1629
|
{
|
1630
|
$pref->{"no_comp_alerts"} = 1;
|
1631
|
next;
|
1632
|
}
|
1633
|
|
1634
|
elsif ($var eq "alerts_dont_count")
|
1635
|
{
|
1636
|
$pref->{"alerts_dont_count"} = 1;
|
1637
|
next;
|
1638
|
}
|
1639
|
|
1640
|
elsif ($var eq 'alertexitrange') {
|
1641
|
if ($args !~ /^\s*(\d+|\d+-\d+)\s*$/) {
|
1642
|
close (CFG);
|
1643
|
return "cf error: invalid exit code range '$args', line $line_num";
|
1644
|
}
|
1645
|
$pref->{"alertexitrange"} = $args;
|
1646
|
}
|
1647
|
|
1648
|
else
|
1649
|
{
|
1650
|
close (CFG);
|
1651
|
return "cf error: unknown syntax [$l], line $line_num";
|
1652
|
}
|
1653
|
|
1654
|
}
|
1655
|
|
1656
|
#
|
1657
|
# non-period variables
|
1658
|
#
|
1659
|
elsif (!$period)
|
1660
|
{
|
1661
|
if ($var eq "interval")
|
1662
|
{
|
1663
|
if (!($args = dhmstos ($args))) {
|
1664
|
close (CFG);
|
1665
|
return "cf error: invalid time interval '$args' (syntax: interval = {positive number}{smhd}), line $line_num";
|
1666
|
}
|
1667
|
}
|
1668
|
|
1669
|
elsif ($var eq "failure_interval")
|
1670
|
{
|
1671
|
if (!($args = dhmstos ($args))) {
|
1672
|
close (CFG);
|
1673
|
return "cf error: invalid interval '$args' (syntax: failure_interval = {positive number}{smhd}), line $line_num";
|
1674
|
}
|
1675
|
}
|
1676
|
|
1677
|
elsif ($var eq "monitor")
|
1678
|
{
|
1679
|
# valid
|
1680
|
}
|
1681
|
|
1682
|
elsif ($var eq "redistribute")
|
1683
|
{
|
1684
|
# valid
|
1685
|
}
|
1686
|
|
1687
|
elsif ($var eq "allow_empty_group")
|
1688
|
{
|
1689
|
# valid
|
1690
|
}
|
1691
|
|
1692
|
elsif ($var eq "description")
|
1693
|
{
|
1694
|
# valid
|
1695
|
}
|
1696
|
|
1697
|
elsif ($var eq "unack_summary")
|
1698
|
{
|
1699
|
if (defined $args) {
|
1700
|
if ($args =~ /y(es)?/i) {
|
1701
|
$args = 1;
|
1702
|
} elsif ($args =~ /n(o)?/i) {
|
1703
|
$args = 0;
|
1704
|
}
|
1705
|
if ($args eq "0" || $args eq "1") {
|
1706
|
$sref->{"unack_summary"} = $args;
|
1707
|
} else {
|
1708
|
return "cf error: invalid unack_summary value '$args' (syntax: unack_summary [0|1|y|yes|n|no])";
|
1709
|
}
|
1710
|
} else {
|
1711
|
$sref->{"unack_summary"} = 1;
|
1712
|
}
|
1713
|
next;
|
1714
|
}
|
1715
|
|
1716
|
elsif ($var eq "traptimeout")
|
1717
|
{
|
1718
|
if (!($args = dhmstos ($args))) {
|
1719
|
close (CFG);
|
1720
|
return "cf error: invalid traptimeout interval '$args' (syntax: traptimeout = {positive number}{smhd}), line $line_num";
|
1721
|
}
|
1722
|
$sref->{"_trap_timer"} = $args;
|
1723
|
}
|
1724
|
|
1725
|
elsif ($var eq "trapduration")
|
1726
|
{
|
1727
|
if (!($args = dhmstos ($args))) {
|
1728
|
close (CFG);
|
1729
|
return "cf error: invalid trapduration interval '$args' (syntax: trapduration = {positive number}{smhd}), line $line_num";
|
1730
|
}
|
1731
|
}
|
1732
|
|
1733
|
elsif ($var eq "randskew")
|
1734
|
{
|
1735
|
if (!($args = dhmstos ($args))) {
|
1736
|
close (CFG);
|
1737
|
return "cf error: invalid randskew time interval '$args' (syntax: randskew = {positive number}{smhd}), line $line_num";
|
1738
|
}
|
1739
|
}
|
1740
|
|
1741
|
elsif ($var eq "dep_behavior")
|
1742
|
{
|
1743
|
if ($args ne "m" && $args ne "a" && $args ne "hm")
|
1744
|
{
|
1745
|
close (CFG);
|
1746
|
return "cf error: unknown dependency behavior '$args' (syntax: dep_behavior = {m|a}), line $line_num";
|
1747
|
}
|
1748
|
}
|
1749
|
|
1750
|
elsif ($var eq "dep_memory")
|
1751
|
{
|
1752
|
my $timeval = dhmstos($args);
|
1753
|
if (!$timeval) {
|
1754
|
close (CFG);
|
1755
|
return "cf error: bad value '$args' for dep_memory option (syntax: dep_memory = timeval), line $line_num";
|
1756
|
}
|
1757
|
$args = $timeval;
|
1758
|
}
|
1759
|
|
1760
|
elsif ($var eq "depend")
|
1761
|
{
|
1762
|
$args =~ s/SELF:/$watchgroup:/g;
|
1763
|
}
|
1764
|
|
1765
|
elsif ($var eq "alertdepend")
|
1766
|
{
|
1767
|
$args =~ s/SELF:/$watchgroup:/g;
|
1768
|
}
|
1769
|
|
1770
|
elsif ($var eq "monitordepend")
|
1771
|
{
|
1772
|
$args =~ s/SELF:/$watchgroup:/g;
|
1773
|
}
|
1774
|
|
1775
|
elsif ($var eq "hostdepend")
|
1776
|
{
|
1777
|
$args =~ s/SELF:/$watchgroup:/g;
|
1778
|
}
|
1779
|
|
1780
|
elsif ($var eq "exclude_hosts")
|
1781
|
{
|
1782
|
my $ex = {};
|
1783
|
foreach my $h (split (/\s+/, $args))
|
1784
|
{
|
1785
|
$ex->{$h} = 1;
|
1786
|
}
|
1787
|
$args = $ex;
|
1788
|
}
|
1789
|
|
1790
|
elsif ($var eq "exclude_period")
|
1791
|
{
|
1792
|
if (inPeriod (time, $args) == -1)
|
1793
|
{
|
1794
|
close (CFG);
|
1795
|
return "cf error: malformed exclude_period '$args' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
|
1796
|
}
|
1797
|
}
|
1798
|
|
1799
|
else
|
1800
|
{
|
1801
|
close (CFG);
|
1802
|
return "cf error: unknown syntax [$l], line $line_num";
|
1803
|
}
|
1804
|
|
1805
|
$sref->{$var} = $args;
|
1806
|
}
|
1807
|
|
1808
|
else
|
1809
|
{
|
1810
|
close (CFG);
|
1811
|
return "cf error: unknown syntax outside of period section [$l], line $line_num";
|
1812
|
}
|
1813
|
}
|
1814
|
|
1815
|
next;
|
1816
|
}
|
1817
|
|
1818
|
close (CFG) || return "Could not open pipe to m4 (check that m4 is properly installed and in your PATH): $!";
|
1819
|
|
1820
|
#
|
1821
|
# Go through each defined hostgroup and check that there is a
|
1822
|
# watch associated with that hostgroup record.
|
1823
|
#
|
1824
|
# hostgroups without associated watches are not a violation of
|
1825
|
# mon config syntax, but it's usually not what you want.
|
1826
|
#
|
1827
|
for (keys(%new_watch)) { $is_watch{$_} = 1 };
|
1828
|
foreach $watchgroup ( keys (%new_groups) ) {
|
1829
|
print STDERR "Warning: hostgroup $watchgroup has no watch assigned to it!\n" unless $is_watch{$watchgroup};
|
1830
|
}
|
1831
|
|
1832
|
#
|
1833
|
# no errors, commit new config if $commit was specified
|
1834
|
#
|
1835
|
return "" unless $commit;
|
1836
|
%views = %new_views;
|
1837
|
%alias = %new_alias;
|
1838
|
%groups = %new_groups;
|
1839
|
%watch = %new_watch;
|
1840
|
%CF = %new_CF;
|
1841
|
|
1842
|
"";
|
1843
|
}
|
1844
|
|
1845
|
|
1846
|
#
|
1847
|
# convert a string like "20m" into seconds
|
1848
|
#
|
1849
|
sub dhmstos {
|
1850
|
my ($str) = @_;
|
1851
|
my ($s);
|
1852
|
|
1853
|
$str = lc ($str);
|
1854
|
|
1855
|
if ($str =~ /^\s*(\d+(?:\.\d+)?)([dhms])\s*$/i) {
|
1856
|
if ($2 eq "m") {
|
1857
|
$s = $1 * 60;
|
1858
|
} elsif ($2 eq "h") {
|
1859
|
$s = $1 * 60 * 60;
|
1860
|
} elsif ($2 eq "d") {
|
1861
|
$s = $1 * 60 * 60 * 24;
|
1862
|
} else {
|
1863
|
$s = $1;
|
1864
|
}
|
1865
|
} else {
|
1866
|
return undef;
|
1867
|
}
|
1868
|
$s;
|
1869
|
}
|
1870
|
|
1871
|
|
1872
|
#
|
1873
|
# reset the state of the server on SIGHUP, and reread config
|
1874
|
# file.
|
1875
|
#
|
1876
|
sub reset_server {
|
1877
|
my ($keepstate) = @_;
|
1878
|
|
1879
|
#
|
1880
|
# reap children that may be running
|
1881
|
#
|
1882
|
foreach my $pid (keys %runningpid) {
|
1883
|
my ($group, $service) = split (/\//, $runningpid{$pid});
|
1884
|
kill 15, $pid;
|
1885
|
waitpid ($pid, 0);
|
1886
|
syslog ('info', "reset killed child $pid, exit status $?");
|
1887
|
remove_proc ($pid);
|
1888
|
}
|
1889
|
|
1890
|
$procs = 0;
|
1891
|
save_state ("all") if ($keepstate);
|
1892
|
syslog ('info', "resetting, and re-reading configuration $CF{CF}");
|
1893
|
|
1894
|
if ((my $err = read_cf ($CF{"CF"}, 1)) ne "") {
|
1895
|
syslog ('err', "error reading config file: $err");
|
1896
|
return undef;
|
1897
|
}
|
1898
|
|
1899
|
normalize_paths;
|
1900
|
gen_scriptdir_hash;
|
1901
|
$lasttm=time; # the last time(2) the loop started
|
1902
|
$fdset_rbits = $fdset_ebits = '';
|
1903
|
set_last_test ();
|
1904
|
randomize_startdelay() if ($CF{"RANDSTART"});
|
1905
|
load_state ("all") if ($keepstate);
|
1906
|
if ($CF{"DTLOGGING"}) {
|
1907
|
init_dtlog();
|
1908
|
}
|
1909
|
|
1910
|
readhistoricfile;
|
1911
|
|
1912
|
if ($CF{"STARTUPALERTS_ON_RESET"}) {
|
1913
|
do_startup_alerts;
|
1914
|
}
|
1915
|
|
1916
|
return 1;
|
1917
|
}
|
1918
|
|
1919
|
|
1920
|
sub init_dtlog {
|
1921
|
my $t = time;
|
1922
|
|
1923
|
return if (!$CF{"DTLOGGING"});
|
1924
|
|
1925
|
if (!open (DTLOG, ">>$CF{DTLOGFILE}")) {
|
1926
|
syslog ('err', "could not append to $CF{DTLOGFILE}: $!");
|
1927
|
$CF{"DTLOGGING"} = 0;
|
1928
|
} else {
|
1929
|
$CF{"DTLOGGING"} = 1;
|
1930
|
print DTLOG <<EOF;
|
1931
|
#
|
1932
|
# downtime log start $t
|
1933
|
# time back up, group, service, first failure, downtime, interval, summary
|
1934
|
#
|
1935
|
EOF
|
1936
|
close (DTLOG);
|
1937
|
}
|
1938
|
}
|
1939
|
|
1940
|
|
1941
|
#
|
1942
|
# remove a process from our state
|
1943
|
#
|
1944
|
sub remove_proc {
|
1945
|
my ($pid) = @_;
|
1946
|
|
1947
|
return if (!defined $runningpid{$pid});
|
1948
|
|
1949
|
vec ($fdset_rbits, fileno($fhandles{$runningpid{$pid}}), 1) = 0;
|
1950
|
close ($fhandles{$runningpid{$pid}});
|
1951
|
delete $fhandles{$runningpid{$pid}};
|
1952
|
delete $running{$runningpid{$pid}};
|
1953
|
delete $runningpid{$pid};
|
1954
|
$procs--;
|
1955
|
}
|
1956
|
|
1957
|
|
1958
|
#
|
1959
|
# exit on SIGTERM
|
1960
|
#
|
1961
|
sub handle_sigterm {
|
1962
|
syslog ("info", "caught TERM signal, exiting");
|
1963
|
exit (1);
|
1964
|
}
|
1965
|
|
1966
|
|
1967
|
#
|
1968
|
# set O_NONBLOCK and FD_CLOEXEC on the given filehandle
|
1969
|
#
|
1970
|
sub configure_filehandle {
|
1971
|
my ($fh) = @_;
|
1972
|
my ($fl);
|
1973
|
|
1974
|
$fl = '';
|
1975
|
$fl = fcntl ($fh, F_GETFL, $fl) || return;
|
1976
|
$fl |= O_NONBLOCK;
|
1977
|
fcntl ($fh, F_SETFL, $fl) || return;
|
1978
|
|
1979
|
$fl = fcntl ($fh, F_GETFD, 0) || return;
|
1980
|
$fl |= FD_CLOEXEC;
|
1981
|
fcntl ($fh, F_SETFD, $fl) || return;
|
1982
|
|
1983
|
return 1;
|
1984
|
}
|
1985
|
|
1986
|
|
1987
|
#
|
1988
|
# setup server
|
1989
|
#
|
1990
|
sub setup_server {
|
1991
|
my ($tcpproto, $udpproto, $fl);
|
1992
|
|
1993
|
if (!defined ($tcpproto = getprotobyname ('tcp')))
|
1994
|
{
|
1995
|
die_die ("err", "could not get protocol for tcp");
|
1996
|
}
|
1997
|
|
1998
|
if (!defined ($udpproto = getprotobyname ('udp')))
|
1999
|
{
|
2000
|
die_die ("err", "could not get protocol for tcp");
|
2001
|
}
|
2002
|
|
2003
|
#
|
2004
|
# client server, such as moncmd
|
2005
|
#
|
2006
|
my $bindaddr;
|
2007
|
if (defined $CF{"SERVERBIND"})
|
2008
|
{
|
2009
|
if (!($bindaddr = gethostbyname ($CF{"SERVERBIND"})))
|
2010
|
{
|
2011
|
die_die ("err", "error returned by gethostbyname for serverbind: $?");
|
2012
|
}
|
2013
|
}
|
2014
|
|
2015
|
else
|
2016
|
{
|
2017
|
$bindaddr = INADDR_ANY;
|
2018
|
}
|
2019
|
|
2020
|
socket (SERVER, PF_INET, SOCK_STREAM, $tcpproto) ||
|
2021
|
die_die ("err", "could not create TCP socket: $!");
|
2022
|
|
2023
|
setsockopt (SERVER, SOL_SOCKET, SO_REUSEADDR, pack ("l", 1)) ||
|
2024
|
die_die ("err", "could not setsockopt: $!");
|
2025
|
|
2026
|
bind (SERVER, sockaddr_in ($CF{"SERVPORT"}, $bindaddr)) ||
|
2027
|
die_die ("err", "could not bind TCP server port $CF{'SERVPORT'}: $!");
|
2028
|
|
2029
|
listen (SERVER, SOMAXCONN);
|
2030
|
|
2031
|
configure_filehandle (*SERVER) ||
|
2032
|
die_die ("err", "could not configure TCP server port: $!");
|
2033
|
|
2034
|
#
|
2035
|
# remote monitor traps
|
2036
|
#
|
2037
|
if (defined $CF{"TRAPBIND"})
|
2038
|
{
|
2039
|
if (!($bindaddr = gethostbyname ($CF{"TRAPBIND"})))
|
2040
|
{
|
2041
|
die_die ("err", "error returned by gethostbyname for trapbind: $?");
|
2042
|
}
|
2043
|
}
|
2044
|
|
2045
|
else
|
2046
|
{
|
2047
|
$bindaddr = INADDR_ANY;
|
2048
|
}
|
2049
|
|
2050
|
socket (TRAPSERVER, PF_INET, SOCK_DGRAM, $udpproto) ||
|
2051
|
die_die ("err", "could not create UDP socket: $!");
|
2052
|
bind (TRAPSERVER, sockaddr_in ($CF{"TRAPPORT"}, $bindaddr)) ||
|
2053
|
die_die ("err", "could not bind UDP server port: $!");
|
2054
|
configure_filehandle (*TRAPSERVER) ||
|
2055
|
die_die ("err", "could not configure UDP trap port: $!");
|
2056
|
}
|
2057
|
|
2058
|
|
2059
|
#
|
2060
|
# set up a client connection if necessary
|
2061
|
#
|
2062
|
sub client_accept {
|
2063
|
my ($rin, $rout, $n, $sock, $port, $addr, $fl);
|
2064
|
|
2065
|
my $CLIENT = new FileHandle;
|
2066
|
|
2067
|
if (!defined ($sock = accept ($CLIENT, SERVER))) {
|
2068
|
syslog ('err', "accept returned error: $!");
|
2069
|
return;
|
2070
|
}
|
2071
|
|
2072
|
debug(1, "accepted client $CLIENT\n");
|
2073
|
my $fno = fileno ($CLIENT);
|
2074
|
|
2075
|
#
|
2076
|
# set socket to nonblocking
|
2077
|
#
|
2078
|
if (!configure_filehandle ($CLIENT)) {
|
2079
|
syslog ("err", "could not configure for client: $!");
|
2080
|
close ($CLIENT);
|
2081
|
return;
|
2082
|
}
|
2083
|
|
2084
|
($port, $addr) = unpack_sockaddr_in ($sock);
|
2085
|
my $clientip = inet_ntoa($addr);
|
2086
|
|
2087
|
syslog ('info', "client connection from $clientip:$port");
|
2088
|
|
2089
|
my @clientregex = split(' ', $CF{"CLIENTALLOW"});
|
2090
|
my $ipok= 0;
|
2091
|
|
2092
|
foreach my $ippattern (@clientregex)
|
2093
|
{
|
2094
|
#
|
2095
|
# change all periods, except those preceded by [ or \, into \.
|
2096
|
#
|
2097
|
$ippattern=~ s/([^[\\])\./$1\\./g;
|
2098
|
|
2099
|
if ($clientip =~ /^${ippattern}$/)
|
2100
|
{
|
2101
|
$ipok= 1;
|
2102
|
last;
|
2103
|
}
|
2104
|
}
|
2105
|
|
2106
|
if (! $ipok)
|
2107
|
{
|
2108
|
syslog('notice', "closing unwanted client: $clientip");
|
2109
|
close($CLIENT);
|
2110
|
return;
|
2111
|
}
|
2112
|
|
2113
|
select ($CLIENT);
|
2114
|
$|=1;
|
2115
|
select (STDOUT);
|
2116
|
|
2117
|
$clients{$fno}->{"host"} = inet_ntoa($addr);
|
2118
|
$clients{$fno}->{"fhandle"} = $CLIENT;
|
2119
|
$clients{$fno}->{"user"} = undef; # username if authenticated
|
2120
|
$clients{$fno}->{"timeout"} = $CF{"CLIENT_TIMEOUT"};
|
2121
|
$clients{$fno}->{"last_read"} = time; # last time data was read
|
2122
|
$clients{$fno}->{"buf"} = '';
|
2123
|
$numclients++;
|
2124
|
}
|
2125
|
|
2126
|
|
2127
|
#
|
2128
|
# do all pending client commands
|
2129
|
#
|
2130
|
sub client_dopending {
|
2131
|
my ($cl, $cmd, $l);
|
2132
|
|
2133
|
foreach $cl (keys %clients) {
|
2134
|
if ($clients{$cl}->{"buf"} =~ /^([^\r\n]*)[\r\n]+/s) {
|
2135
|
$cmd = $1;
|
2136
|
$l = length ($cmd);
|
2137
|
$clients{$cl}->{"buf"} =~ s/^[^\r\n]*[\r\n]+//s;
|
2138
|
client_command ($cl, $cmd);
|
2139
|
}
|
2140
|
}
|
2141
|
}
|
2142
|
|
2143
|
|
2144
|
#
|
2145
|
# close a client connection
|
2146
|
#
|
2147
|
sub client_close {
|
2148
|
my ($cl, $reason) = @_;
|
2149
|
|
2150
|
syslog ('info', "closing client $cl: $reason") if (defined $reason);
|
2151
|
die if !defined ($clients{$cl}->{"fhandle"});
|
2152
|
close ($clients{$cl}->{"fhandle"});
|
2153
|
delete $clients{$cl};
|
2154
|
vec ($iovec, $cl, 1) = 0;
|
2155
|
$numclients--;
|
2156
|
}
|
2157
|
|
2158
|
|
2159
|
#
|
2160
|
# Handle a connection from a client
|
2161
|
#
|
2162
|
sub client_command {
|
2163
|
my ($cl, $l) = @_;
|
2164
|
my ($cmd, $args, $group, $service, $s, $sname, $stchanged);
|
2165
|
my ($var, $value, $msg, @l, $sock, $port, $addr, $sref, $auth, $fh);
|
2166
|
my ($user, $pass, @argsList, $comment);
|
2167
|
my ($authtype, @authtypes);
|
2168
|
my $is_auth = 0; #flag for multiple auth types
|
2169
|
|
2170
|
syslog ('info', "client command \"$l\"")
|
2171
|
if ($l !~ /^\s*login/i);
|
2172
|
|
2173
|
$fh = $clients{$cl}->{"fhandle"};
|
2174
|
|
2175
|
if ($l !~ /^(dump|login|disable|enable|quit|list|set|get|setview|getview|
|
2176
|
stop|start|loadstate|savestate|reset|clear|checkauth|
|
2177
|
reload|term|test|servertime|ack|version|protid)(\s+(.*))?$/ix) {
|
2178
|
sock_write ($fh, "520 invalid command\n");
|
2179
|
return;
|
2180
|
}
|
2181
|
($cmd, $args) = ("\L$1", $3);
|
2182
|
|
2183
|
$stchanged = 0;
|
2184
|
|
2185
|
print STDERR "client command $cmd\nclient args $args\n";
|
2186
|
#
|
2187
|
# quit command
|
2188
|
#
|
2189
|
if ($cmd eq "quit") {
|
2190
|
sock_write ($fh, "220 quitting\n");
|
2191
|
client_close ($cl);
|
2192
|
|
2193
|
} elsif ($opt{"d"} && $cmd eq "dump") {
|
2194
|
print STDERR Dumper (\%watch), "\n\n";
|
2195
|
|
2196
|
#
|
2197
|
# protocol identification
|
2198
|
#
|
2199
|
} elsif ($cmd eq "protid") {
|
2200
|
if ($args != int ($PROT_VERSION))
|
2201
|
{
|
2202
|
sock_write ($fh, "520 protocol mismatch\n");
|
2203
|
}
|
2204
|
|
2205
|
else
|
2206
|
{
|
2207
|
sock_write ($fh, "220 protocol match\n");
|
2208
|
}
|
2209
|
|
2210
|
#
|
2211
|
# login
|
2212
|
#
|
2213
|
} elsif ($cmd eq "login") {
|
2214
|
($user, $pass) = split (/\s+/, $args, 2);
|
2215
|
@authtypes = split(' ' , $CF{"AUTHTYPE"}) ;
|
2216
|
# Check each for of authentication in order, and stop checking
|
2217
|
# as soon as we get a positive authentication result.
|
2218
|
foreach $authtype (@authtypes) {
|
2219
|
if (defined auth ($authtype, $user, $pass, $clients{$cl}->{"host"})) {
|
2220
|
$is_auth = 1;
|
2221
|
last;
|
2222
|
}
|
2223
|
}
|
2224
|
if ($is_auth != 1) {
|
2225
|
sock_write ($fh, "530 login unsuccessful\n");
|
2226
|
} else {
|
2227
|
$clients{$cl}->{"user"} = $user;
|
2228
|
syslog ("info", "authenticated $user");
|
2229
|
sock_write ($fh, "220 login accepted\n");
|
2230
|
}
|
2231
|
|
2232
|
#
|
2233
|
# reset
|
2234
|
#
|
2235
|
} elsif ($cmd eq "reset" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2236
|
my ($keepstate);
|
2237
|
if ($args =~ /stopped/i) {
|
2238
|
$STOPPED = 1;
|
2239
|
$STOPPED_TIME = time;
|
2240
|
}
|
2241
|
|
2242
|
if ($args =~ /keepstate/) {
|
2243
|
$keepstate = 1;
|
2244
|
}
|
2245
|
|
2246
|
if (reset_server ($keepstate)) {
|
2247
|
sock_write ($fh, "220 reset PID $$\@$HOSTNAME\n");
|
2248
|
} else {
|
2249
|
sock_write ($fh, "520 reset PID $$\@$HOSTNAME failed, error in config file\n");
|
2250
|
}
|
2251
|
|
2252
|
#
|
2253
|
# reload
|
2254
|
#
|
2255
|
} elsif ($cmd eq "reload" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2256
|
if (!defined reload (split (/\s+/, $args))) {
|
2257
|
sock_write ($fh, "520 unknown reload command\n");
|
2258
|
} else {
|
2259
|
sock_write ($fh, "220 reload completed\n");
|
2260
|
}
|
2261
|
|
2262
|
#
|
2263
|
# clear
|
2264
|
#
|
2265
|
} elsif ($cmd eq "clear" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2266
|
if ($args =~ /^timers \s+ ([a-zA-Z0-9_.-]+) \s+ ([a-zA-Z0-9_.-]+)/ix) {
|
2267
|
if (!defined $watch{$1}->{$2}) {
|
2268
|
sock_write ($fh, "520 unknown group\n");
|
2269
|
} else {
|
2270
|
clear_timers ($1, $2);
|
2271
|
sock_write ($fh, "220 clear timers completed\n");
|
2272
|
}
|
2273
|
|
2274
|
} else {
|
2275
|
sock_write ($fh, "520 unknown clear command\n");
|
2276
|
next;
|
2277
|
}
|
2278
|
|
2279
|
#
|
2280
|
# test
|
2281
|
#
|
2282
|
} elsif ($cmd eq "test" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2283
|
my ($cmd, $args) = split (/\s+/, $args, 2);
|
2284
|
|
2285
|
#
|
2286
|
# test monitor
|
2287
|
#
|
2288
|
if ($cmd eq "monitor") {
|
2289
|
my ($group, $service) = split (/\s+/, $args);
|
2290
|
|
2291
|
if (!defined $watch{$group}->{$service}) {
|
2292
|
sock_write ($fh, "$group $service not defined\n");
|
2293
|
} else {
|
2294
|
$watch{$group}->{$service}->{"_timer"} = 0;
|
2295
|
$watch{$group}->{$service}->{"_next_check"} = 0;
|
2296
|
mysystem("$CF{MONREMOTE} test $group $service") if ($CF{MONREMOTE});
|
2297
|
}
|
2298
|
sock_write ($fh, "220 test monitor completed\n");
|
2299
|
|
2300
|
#
|
2301
|
# test alert
|
2302
|
#
|
2303
|
} elsif ($cmd =~ /^alert|startupalert|upalert|ackalert|disablealert$/) {
|
2304
|
my ($group, $service, $retval, $period) = split (/\s+/, $args, 4);
|
2305
|
|
2306
|
if (!defined $watch{$group}->{$service}) {
|
2307
|
sock_write ($fh, "520 $group $service not defined\n");
|
2308
|
|
2309
|
} elsif (!defined $watch{$group}->{$service}->{"periods"}->{$period}) {
|
2310
|
sock_write ($fh, "520 period not defined\n");
|
2311
|
|
2312
|
} else {
|
2313
|
my $f = 0;
|
2314
|
my $a;
|
2315
|
|
2316
|
if ($cmd eq "alert") {
|
2317
|
$a = $watch{$group}->{$service}->{"periods"}->{$period}->{"alerts"};
|
2318
|
} elsif ($cmd eq "startupalert") {
|
2319
|
$f = $FL_STARTUPALERT;
|
2320
|
$a = $watch{$group}->{$service}->{"periods"}->{$period}->{"startupalerts"};
|
2321
|
} elsif ($cmd eq "upalert") {
|
2322
|
$f = $FL_UPALERT;
|
2323
|
$a = $watch{$group}->{$service}->{"periods"}->{$period}->{"upalerts"};
|
2324
|
} elsif ($cmd eq "ackalert") {
|
2325
|
$f = $FL_ACKALERT;
|
2326
|
$a = $watch{$group}->{$service}->{"periods"}->{$period}->{"ackalerts"};
|
2327
|
} elsif ($cmd eq "disablealert") {
|
2328
|
$f = $FL_DISABLEALERT;
|
2329
|
$a = $watch{$group}->{$service}->{"periods"}->{$period}->{"disablealerts"};
|
2330
|
}
|
2331
|
|
2332
|
for (@{$a}) {
|
2333
|
my ($alert, $args) = split (/\s+/, $_, 2);
|
2334
|
|
2335
|
if ($args =~ /^exit=/) {
|
2336
|
$args =~ s/^exit=\S+ \s+//x;
|
2337
|
}
|
2338
|
|
2339
|
call_alert (
|
2340
|
group => $group,
|
2341
|
service => $service,
|
2342
|
output => "test\ntest detail\n",
|
2343
|
retval => $retval,
|
2344
|
flags => $f | $FL_TEST,
|
2345
|
alert => $alert,
|
2346
|
args => $args,
|
2347
|
);
|
2348
|
}
|
2349
|
|
2350
|
sock_write ($fh, "220 test alert completed\n");
|
2351
|
}
|
2352
|
|
2353
|
#
|
2354
|
# test config file
|
2355
|
#
|
2356
|
} elsif ($cmd =~ /^config$/) {
|
2357
|
if ((my $err = read_cf ($CF{"CF"}, 0)) ne "") {
|
2358
|
sock_write ($fh, $err);
|
2359
|
sock_write ($fh, "\n520 test config completed, errors found in config file\n");
|
2360
|
}
|
2361
|
|
2362
|
else
|
2363
|
{
|
2364
|
sock_write ($fh, "220 test config completed OK, no errors found\n");
|
2365
|
}
|
2366
|
|
2367
|
} else {
|
2368
|
sock_write ($fh, "520 test error\n");
|
2369
|
}
|
2370
|
|
2371
|
#
|
2372
|
# version
|
2373
|
#
|
2374
|
} elsif ($cmd eq "version") {
|
2375
|
sock_write ($fh, "version " . int ($PROT_VERSION) . "\n");
|
2376
|
sock_write ($fh, "220 version completed\n");
|
2377
|
|
2378
|
#
|
2379
|
# load state
|
2380
|
#
|
2381
|
} elsif ($cmd eq "loadstate" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2382
|
foreach (split (/\s+/, $args)) {
|
2383
|
load_state ($_);
|
2384
|
}
|
2385
|
sock_write ($fh, "220 loadstate completed\n");
|
2386
|
|
2387
|
#
|
2388
|
# save state
|
2389
|
#
|
2390
|
} elsif ($cmd eq "savestate" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2391
|
if ($args =~ /\S/)
|
2392
|
{
|
2393
|
foreach (split (/\s+/, $args))
|
2394
|
{
|
2395
|
save_state ($_);
|
2396
|
}
|
2397
|
sock_write ($fh, "220 savestate completed\n");
|
2398
|
}
|
2399
|
|
2400
|
else
|
2401
|
{
|
2402
|
sock_write ($fh, "520 savestate error, arguments required\n");
|
2403
|
}
|
2404
|
|
2405
|
#
|
2406
|
# term
|
2407
|
#
|
2408
|
} elsif ($cmd eq "term" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2409
|
sock_write ($fh, "220 terminating server\n");
|
2410
|
client_close ($cl, "terminated by user command");
|
2411
|
syslog ("info", "terminating by user command");
|
2412
|
exit;
|
2413
|
|
2414
|
#
|
2415
|
# stop testing
|
2416
|
#
|
2417
|
} elsif ($cmd eq "stop"&& check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2418
|
$STOPPED = 1;
|
2419
|
$STOPPED_TIME = time;
|
2420
|
sock_write ($fh, "220 stop completed\n");
|
2421
|
|
2422
|
#
|
2423
|
# start testing
|
2424
|
#
|
2425
|
} elsif ($cmd eq "start" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2426
|
$STOPPED = 0;
|
2427
|
$STOPPED_TIME = 0;
|
2428
|
sock_write ($fh, "220 start completed\n");
|
2429
|
|
2430
|
} elsif ($cmd eq "setview") {
|
2431
|
my @args=split /\s+/, $args;
|
2432
|
if (@args > 1) {
|
2433
|
sock_write($fh, "500 Unknown setview command\n")
|
2434
|
} elsif (@args == 1) {
|
2435
|
if (defined($views{$args[0]})) {
|
2436
|
$clients{$cl}->{"view"} = $args[0];
|
2437
|
sock_write($fh, "selecting view $args[0]\n");
|
2438
|
sock_write($fh, "220 setview completed\n")
|
2439
|
} else {
|
2440
|
sock_write($fh, "504 unknown view $args[0]\n");
|
2441
|
}
|
2442
|
} else {
|
2443
|
delete $clients{$cl}->{"view"};
|
2444
|
sock_write($fh, "no view selected -- all groups will be displayed\n");
|
2445
|
sock_write($fh, "220 setview completed\n")
|
2446
|
}
|
2447
|
} elsif ($cmd eq "getview") {
|
2448
|
if ($clients{$cl}->{"view"}) {
|
2449
|
sock_write($fh, "view ".$clients{$cl}->{"view"}. " selected\n");
|
2450
|
} else {
|
2451
|
sock_write($fh, "no view selected -- all groups will be displayed\n");
|
2452
|
}
|
2453
|
sock_write($fh, "220 getview completed\n")
|
2454
|
#
|
2455
|
# set
|
2456
|
#
|
2457
|
} elsif ($cmd eq "set" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2458
|
if ($args =~ /^maxkeep\s+(\d+)/) {
|
2459
|
$CF{"MAX_KEEP"} = $1;
|
2460
|
sock_write ($fh, "220 set completed\n");
|
2461
|
} else {
|
2462
|
($group, $service, $var, $value) = split (/\s+/, $args, 4);
|
2463
|
if (!defined $watch{$group}->{$service}) {
|
2464
|
sock_write ($fh, "520 $group,$service not defined\n");
|
2465
|
} elsif ($var eq "opstatus") {
|
2466
|
if (!defined ($OPSTAT{$value})) {
|
2467
|
sock_write ($fh, "520 undefined opstatus\n");
|
2468
|
} else {
|
2469
|
set_op_status ($group, $service,
|
2470
|
un_esc_str ((parse_line ('\s+', 0, $value))[0]));
|
2471
|
sock_write ($fh, "220 set completed\n");
|
2472
|
}
|
2473
|
|
2474
|
} else {
|
2475
|
$value = un_esc_str ((parse_line ('\s+', 0, $value))[0]);
|
2476
|
$watch{$group}->{$service}->{$var} = $value;
|
2477
|
sock_write ($fh, "$group $service $var='$value'\n");
|
2478
|
sock_write ($fh, "220 set completed\n");
|
2479
|
}
|
2480
|
}
|
2481
|
|
2482
|
#
|
2483
|
# get
|
2484
|
#
|
2485
|
} elsif ($cmd eq "get" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2486
|
if ($args =~ /^maxkeep\s*$/) {
|
2487
|
sock_write ($fh, "maxkeep = $CF{MAX_KEEP}\n");
|
2488
|
sock_write ($fh, "220 set completed\n");
|
2489
|
} else {
|
2490
|
($group, $service, $var) = split (/\s+/, $args, 3);
|
2491
|
if (!defined $watch{$group}->{$service}) {
|
2492
|
sock_write ($fh, "520 $group,$service not defined\n");
|
2493
|
} else {
|
2494
|
sock_write ($fh, "$group $service $var='" .
|
2495
|
esc_str ($watch{$group}->{$service}->{$var}, 1) . "'\n");
|
2496
|
sock_write ($fh, "220 get completed\n");
|
2497
|
}
|
2498
|
}
|
2499
|
|
2500
|
#
|
2501
|
# list
|
2502
|
#
|
2503
|
} elsif ($cmd eq "list" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2504
|
@argsList = split(/\s+/, $args);
|
2505
|
($cmd, $args) = split (/\s+/, $args, 2);
|
2506
|
|
2507
|
#
|
2508
|
# list service descriptions
|
2509
|
#
|
2510
|
if ($cmd eq "descriptions") {
|
2511
|
foreach $group (keys %watch) {
|
2512
|
foreach $service (keys %{$watch{$group}}) {
|
2513
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2514
|
sock_write ($fh, "$group $service " .
|
2515
|
esc_str ($watch{$group}->{$service}->{"description"}, 1) .
|
2516
|
"\n");
|
2517
|
}
|
2518
|
}
|
2519
|
}
|
2520
|
sock_write ($fh, "220 list descriptions completed\n");
|
2521
|
|
2522
|
#
|
2523
|
# list group members
|
2524
|
#
|
2525
|
} elsif ($cmd eq "group") {
|
2526
|
if ($groups{$args}) {
|
2527
|
sock_write ($fh, "hostgroup $args @{$groups{$args}}\n");
|
2528
|
sock_write ($fh, "220 list group completed\n");
|
2529
|
} else {
|
2530
|
sock_write ($fh, "520 list group error, undefined group\n");
|
2531
|
}
|
2532
|
|
2533
|
#
|
2534
|
# list status of all services
|
2535
|
#
|
2536
|
} elsif ($cmd eq "opstatus") {
|
2537
|
if (!defined $args || $args eq "")
|
2538
|
{
|
2539
|
foreach $group (keys %watch) {
|
2540
|
foreach $service (keys %{$watch{$group}}) {
|
2541
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2542
|
client_write_opstatus ($fh, $group, $service);
|
2543
|
}
|
2544
|
}
|
2545
|
}
|
2546
|
sock_write ($fh, "220 list opstatus completed\n");
|
2547
|
}
|
2548
|
|
2549
|
else
|
2550
|
{
|
2551
|
my $err = 0;
|
2552
|
my @g = ();
|
2553
|
my ($group, $service);
|
2554
|
|
2555
|
foreach my $gs (split (/\s+/, $args))
|
2556
|
{
|
2557
|
($group, $service) = split (/,/, $gs);
|
2558
|
$err++ && last if ($service ne "" && !defined $watch{$group}->{$service});
|
2559
|
push (@g, [$group, $service]);
|
2560
|
}
|
2561
|
|
2562
|
if (!$err)
|
2563
|
{
|
2564
|
foreach my $gs (@g)
|
2565
|
{
|
2566
|
if ($gs->[1] ne "") {
|
2567
|
client_write_opstatus ($fh, $gs->[0], $gs->[1]);
|
2568
|
} else {
|
2569
|
foreach $service (keys %{$watch{$gs->[0]}}) {
|
2570
|
client_write_opstatus ($fh, $gs->[0], $service);
|
2571
|
}
|
2572
|
}
|
2573
|
}
|
2574
|
sock_write ($fh, "220 list opstatus completed\n");
|
2575
|
}
|
2576
|
|
2577
|
else
|
2578
|
{
|
2579
|
sock_write ($fh, "520 $group,$service does not exist\n");
|
2580
|
}
|
2581
|
}
|
2582
|
|
2583
|
#
|
2584
|
# list disabled hosts and services
|
2585
|
#
|
2586
|
} elsif ($cmd eq "disabled") {
|
2587
|
foreach $group (keys %groups) {
|
2588
|
if (view_match($clients{$cl}->{"view"}, $group, undef)) {
|
2589
|
@l = grep (/^\*/, @{$groups{$group}});
|
2590
|
if (@l) {
|
2591
|
grep (s/^\*//, @l);
|
2592
|
sock_write ($fh, "group $group: @l\n");
|
2593
|
}
|
2594
|
}
|
2595
|
}
|
2596
|
foreach $group (keys %watch) {
|
2597
|
if (view_match($clients{$cl}->{"view"}, $group, undef)) {
|
2598
|
if (exists $watch_disabled{$group} && $watch_disabled{$group} == 1) {
|
2599
|
sock_write ($fh, "watch $group\n");
|
2600
|
}
|
2601
|
}
|
2602
|
foreach $service (keys %{$watch{$group}}) {
|
2603
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2604
|
if (defined $watch{$group}->{$service}->{'disable'}
|
2605
|
&& $watch{$group}->{$service}->{'disable'} == 1) {
|
2606
|
sock_write ($fh, "watch $group service " .
|
2607
|
"$service\n");
|
2608
|
}
|
2609
|
}
|
2610
|
}
|
2611
|
}
|
2612
|
sock_write ($fh, "220 list disabled completed\n");
|
2613
|
|
2614
|
#
|
2615
|
# list last alert history
|
2616
|
#
|
2617
|
} elsif ($cmd eq "alerthist") {
|
2618
|
foreach my $l (@last_alerts)
|
2619
|
{
|
2620
|
sock_write ($fh, esc_str ($l) . "\n");
|
2621
|
}
|
2622
|
sock_write ($fh, "220 list alerthist completed\n");
|
2623
|
|
2624
|
#
|
2625
|
# list time of last failures for each service
|
2626
|
#
|
2627
|
} elsif ($cmd eq "failures") {
|
2628
|
foreach $group (keys %watch) {
|
2629
|
foreach $service (keys %{$watch{$group}}) {
|
2630
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2631
|
my $sref = \%{$watch{$group}->{$service}};
|
2632
|
client_write_opstatus ($fh, $group, $service)
|
2633
|
if ($FAILURE{$sref->{"_op_status"}});
|
2634
|
}
|
2635
|
}
|
2636
|
}
|
2637
|
sock_write ($fh, "220 list failures completed\n");
|
2638
|
|
2639
|
#
|
2640
|
# list the failure history
|
2641
|
#
|
2642
|
} elsif ($cmd eq "failurehist") {
|
2643
|
foreach my $l (@last_failures)
|
2644
|
{
|
2645
|
sock_write ($fh, esc_str ($l) . "\n");
|
2646
|
}
|
2647
|
sock_write ($fh, "220 list failurehist completed\n");
|
2648
|
|
2649
|
#
|
2650
|
# list the time of last successes for each service
|
2651
|
#
|
2652
|
} elsif ($cmd eq "successes") {
|
2653
|
foreach $group (keys %watch) {
|
2654
|
foreach $service (keys %{$watch{$group}}) {
|
2655
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2656
|
my $sref = \%{$watch{$group}->{$service}};
|
2657
|
client_write_opstatus ($fh, $group, $service)
|
2658
|
if ($SUCCESS{$sref->{"_op_status"}});
|
2659
|
}
|
2660
|
}
|
2661
|
}
|
2662
|
sock_write ($fh, "220 list successes completed\n");
|
2663
|
|
2664
|
#
|
2665
|
# list warnings
|
2666
|
#
|
2667
|
} elsif ($cmd eq "warnings") {
|
2668
|
foreach $group (keys %watch) {
|
2669
|
foreach $service (keys %{$watch{$group}}) {
|
2670
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2671
|
my $sref = \%{$watch{$group}->{$service}};
|
2672
|
client_write_opstatus ($fh, $group, $service)
|
2673
|
if ($WARNING{$sref->{"_op_status"}});
|
2674
|
}
|
2675
|
}
|
2676
|
}
|
2677
|
sock_write ($fh, "220 list successes completed\n");
|
2678
|
|
2679
|
#
|
2680
|
# list process IDs
|
2681
|
#
|
2682
|
} elsif ($cmd eq "pids") {
|
2683
|
sock_write ($fh, "server $$\n");
|
2684
|
foreach $value (keys %runningpid) {
|
2685
|
($group, $service) = split (/\//, $runningpid{$value});
|
2686
|
sock_write ($fh, "$group $service $value\n");
|
2687
|
}
|
2688
|
sock_write ($fh, "220 list pids completed\n");
|
2689
|
|
2690
|
#
|
2691
|
# list watch groups and services
|
2692
|
#
|
2693
|
} elsif ($cmd eq "watch") {
|
2694
|
foreach $group (keys %watch) {
|
2695
|
foreach $service (keys %{$watch{$group}}) {
|
2696
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2697
|
if (!defined $watch{$group}->{$service}) {
|
2698
|
sock_write ($fh, "$group (undefined service)\n");
|
2699
|
} else {
|
2700
|
sock_write ($fh, "$group $service\n");
|
2701
|
}
|
2702
|
}
|
2703
|
}
|
2704
|
}
|
2705
|
sock_write ($fh, "220 list watch completed\n");
|
2706
|
|
2707
|
#
|
2708
|
# list server state
|
2709
|
#
|
2710
|
} elsif ($cmd eq "state") {
|
2711
|
if ($STOPPED) {
|
2712
|
sock_write ($fh, "scheduler stopped since $STOPPED_TIME\n");
|
2713
|
} else {
|
2714
|
sock_write ($fh, "scheduler running\n");
|
2715
|
}
|
2716
|
sock_write ($fh, "220 list state completed\n");
|
2717
|
|
2718
|
#
|
2719
|
# list aliases
|
2720
|
#
|
2721
|
} elsif ($cmd eq "aliases") {
|
2722
|
my (@listAliasesRequest) = @argsList;
|
2723
|
|
2724
|
shift (@listAliasesRequest);
|
2725
|
|
2726
|
# if no alias request, all alias are responded
|
2727
|
unless (@listAliasesRequest) {
|
2728
|
@listAliasesRequest = keys (%alias);
|
2729
|
}
|
2730
|
|
2731
|
foreach my $alias (@listAliasesRequest){
|
2732
|
sock_write ($fh, "alias $alias\n");
|
2733
|
foreach $value (@{$alias{$alias}}) {
|
2734
|
sock_write ($fh, "$value\n");
|
2735
|
}
|
2736
|
sock_write ($fh, "\n");
|
2737
|
}
|
2738
|
sock_write ($fh, "220 list aliases completed\n");
|
2739
|
|
2740
|
#
|
2741
|
# list aliasgroups
|
2742
|
#
|
2743
|
} elsif ($cmd eq "aliasgroups") {
|
2744
|
my (@listAliasesRequest);
|
2745
|
@listAliasesRequest = keys (%alias);
|
2746
|
|
2747
|
sock_write ($fh, "@listAliasesRequest\n")
|
2748
|
unless (@listAliasesRequest == 0);
|
2749
|
sock_write ($fh, "220 list aliasgroups completed\n");
|
2750
|
|
2751
|
#
|
2752
|
# list deps
|
2753
|
#
|
2754
|
} elsif ($cmd eq "deps") {
|
2755
|
foreach my $g (keys %watch) {
|
2756
|
foreach my $s (keys %{$watch{$g}}) {
|
2757
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2758
|
my $sref = \%{$watch{$g}->{$s}};
|
2759
|
if ($sref->{"depend"} ne "") {
|
2760
|
sock_write ($fh, "exp $g $s '" .
|
2761
|
esc_str ($sref->{"depend"}, 1) . "'\n");
|
2762
|
} else {
|
2763
|
sock_write ($fh, "exp $g $s 'NONE'\n");
|
2764
|
}
|
2765
|
my @u =
|
2766
|
($sref->{"depend"} =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g);
|
2767
|
if (@u) {
|
2768
|
sock_write ($fh, "cmp $g $s @u\n");
|
2769
|
} else {
|
2770
|
sock_write ($fh, "cmp $g $s NONE\n");
|
2771
|
}
|
2772
|
}
|
2773
|
}
|
2774
|
}
|
2775
|
|
2776
|
sock_write ($fh, "220 list deps completed\n");
|
2777
|
|
2778
|
#
|
2779
|
# downtime log
|
2780
|
#
|
2781
|
} elsif ($cmd eq "dtlog") {
|
2782
|
if ($CF{"DTLOGGING"}) {
|
2783
|
if (!open (DTLOGTMP, "< $CF{DTLOGFILE}")) {
|
2784
|
sock_write ($fh, "520 list dtlog error, cannot open dtlog\n");
|
2785
|
|
2786
|
} else {
|
2787
|
while (<DTLOGTMP>) {
|
2788
|
sock_write ($fh, $_ ) if (!/^#/ && !/^\s*$/);
|
2789
|
}
|
2790
|
|
2791
|
close (DTLOGTMP);
|
2792
|
|
2793
|
sock_write ($fh, "220 list dtlog completed\n");
|
2794
|
}
|
2795
|
|
2796
|
} else {
|
2797
|
sock_write ($fh, "520 list dtlog error, dtlogging is not turned on\n");
|
2798
|
}
|
2799
|
|
2800
|
#
|
2801
|
# list available views
|
2802
|
#
|
2803
|
} elsif ($cmd eq "views") {
|
2804
|
sock_write ($fh, "views ".join(' ',sort(keys %views))."\n");
|
2805
|
sock_write ($fh, "220 list group completed\n");
|
2806
|
|
2807
|
|
2808
|
# unknown list command
|
2809
|
} else {
|
2810
|
sock_write ($fh, "520 unknown list command\n");
|
2811
|
}
|
2812
|
|
2813
|
#
|
2814
|
# acknowledge a failure
|
2815
|
#
|
2816
|
} elsif ($cmd eq "ack" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2817
|
my ($group, $service, $comment) = split (/\s+/, $args, 3);
|
2818
|
|
2819
|
if (!defined ($watch{$group})) {
|
2820
|
sock_write ($fh, "520 unknown group\n");
|
2821
|
|
2822
|
} elsif (!defined $watch{$group}->{$service}) {
|
2823
|
sock_write ($fh, "520 unknown service\n");
|
2824
|
}
|
2825
|
|
2826
|
my $sref = \%{$watch{$group}->{$service}};
|
2827
|
|
2828
|
if ($sref->{"_op_status"} == $STAT_OK ||
|
2829
|
$sref->{"_op_status"} == $STAT_UNTESTED) {
|
2830
|
sock_write ($fh, "520 service is in a non-failure state\n");
|
2831
|
|
2832
|
} else {
|
2833
|
$sref->{"_ack"} = time;
|
2834
|
$sref->{"_ack_comment"} = $clients{$cl}->{"user"} . ": " .
|
2835
|
un_esc_str ((parse_line ('\s+', 0, $comment))[0]);
|
2836
|
sock_write ($fh, "220 ack completed\n");
|
2837
|
do_alert($group, $service, $sref->{"_ack_comment"}, undef, $FL_ACKALERT)
|
2838
|
}
|
2839
|
|
2840
|
#
|
2841
|
# disable watch, service or host
|
2842
|
#
|
2843
|
} elsif ($cmd eq "disable" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2844
|
($cmd, $args) = split (/\s+/, $args, 2);
|
2845
|
|
2846
|
#
|
2847
|
# disable watch
|
2848
|
#
|
2849
|
if ($cmd eq "watch") {
|
2850
|
if (!defined (disen_watch($args, 0))) {
|
2851
|
sock_write ($fh, "520 disable error, unknown watch \"$args\"\n");
|
2852
|
} else {
|
2853
|
$stchanged++;
|
2854
|
mysystem("$CF{MONREMOTE} disable watch $args") if ($CF{MONREMOTE});
|
2855
|
sock_write ($fh, "220 disable watch completed\n");
|
2856
|
}
|
2857
|
|
2858
|
#
|
2859
|
# disable service
|
2860
|
#
|
2861
|
} elsif ($cmd eq "service") {
|
2862
|
($group, $service) = split (/\s+/, $args, 2);
|
2863
|
|
2864
|
if (!defined (disen_service ($group, $service, 0))) {
|
2865
|
sock_write ($fh, "520 disable error, unknown service\n");
|
2866
|
} else {
|
2867
|
$stchanged++;
|
2868
|
mysystem("$CF{MONREMOTE} disable service $group $service") if ($CF{MONREMOTE});
|
2869
|
sock_write ($fh, "220 disable service completed\n");
|
2870
|
do_alert($group, $service, $clients{$cl}->{"user"}, undef, $FL_DISABLEALERT)
|
2871
|
}
|
2872
|
|
2873
|
#
|
2874
|
# disable host
|
2875
|
#
|
2876
|
} elsif ($cmd eq "host") {
|
2877
|
my @notfound = ();
|
2878
|
|
2879
|
my @hosts = split (/\s+/, $args);
|
2880
|
|
2881
|
foreach my $h (@hosts)
|
2882
|
{
|
2883
|
if (!host_exists ($h))
|
2884
|
{
|
2885
|
push @notfound, $h;
|
2886
|
}
|
2887
|
}
|
2888
|
|
2889
|
if (@notfound)
|
2890
|
{
|
2891
|
sock_write ($fh, "520 disable host failed, host(s) @notfound do not exist\n");
|
2892
|
}
|
2893
|
|
2894
|
else
|
2895
|
{
|
2896
|
foreach my $h (@hosts)
|
2897
|
{
|
2898
|
#
|
2899
|
# disable a watch if there is a group with this host
|
2900
|
# as its only member. this prevents warning messages
|
2901
|
# about monitors not being run on empty host groups
|
2902
|
#
|
2903
|
foreach my $g (host_singleton_group($h)) {
|
2904
|
disen_watch($g, 0);
|
2905
|
mysystem("$CF{MONREMOTE} disable watch $g") if ($CF{MONREMOTE});
|
2906
|
}
|
2907
|
|
2908
|
disen_host ($h, 0);
|
2909
|
$stchanged++;
|
2910
|
mysystem("$CF{MONREMOTE} disable host $h") if ($CF{MONREMOTE});
|
2911
|
}
|
2912
|
sock_write ($fh, "220 disable host completed\n");
|
2913
|
}
|
2914
|
|
2915
|
} else {
|
2916
|
sock_write ($fh, "520 command could not be executed\n");
|
2917
|
}
|
2918
|
|
2919
|
#
|
2920
|
# enable watch, service or host
|
2921
|
#
|
2922
|
} elsif ($cmd eq "enable" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2923
|
($cmd, $args) = split (/\s+/, $args, 2);
|
2924
|
|
2925
|
#
|
2926
|
# enable watch
|
2927
|
#
|
2928
|
if ($cmd eq "watch") {
|
2929
|
if (!defined (disen_watch ($args, 1))) {
|
2930
|
sock_write ($fh, "520 enable error, unknown watch\n");
|
2931
|
} else {
|
2932
|
$stchanged++;
|
2933
|
mysystem("$CF{MONREMOTE} enable watch $args") if ($CF{MONREMOTE});
|
2934
|
sock_write ($fh, "220 enable watch completed\n");
|
2935
|
}
|
2936
|
|
2937
|
|
2938
|
#
|
2939
|
# enable service
|
2940
|
#
|
2941
|
} elsif ($cmd eq "service") {
|
2942
|
($group, $service) = split (/\s+/, $args, 2);
|
2943
|
|
2944
|
if (!defined (disen_service ($group, $service, 1))) {
|
2945
|
sock_write ($fh, "520 enable error, unknown group\n");
|
2946
|
} else {
|
2947
|
$stchanged++;
|
2948
|
mysystem("$CF{MONREMOTE} enable service $group $service") if ($CF{MONREMOTE});
|
2949
|
sock_write ($fh, "220 enable completed\n");
|
2950
|
}
|
2951
|
|
2952
|
#
|
2953
|
# enable host
|
2954
|
#
|
2955
|
} elsif ($cmd eq "host") {
|
2956
|
foreach my $h (split (/\s+/, $args)) {
|
2957
|
foreach my $g (host_singleton_group($h)) {
|
2958
|
disen_watch($g, 1);
|
2959
|
mysystem("$CF{MONREMOTE} enable watch $g") if ($CF{MONREMOTE});
|
2960
|
}
|
2961
|
|
2962
|
disen_host ($h, 1);
|
2963
|
mysystem("$CF{MONREMOTE} enable host $h") if ($CF{MONREMOTE});
|
2964
|
$stchanged++;
|
2965
|
}
|
2966
|
sock_write ($fh, "220 enable completed\n");
|
2967
|
|
2968
|
} else {
|
2969
|
sock_write ($fh, "520 command could not be executed\n");
|
2970
|
}
|
2971
|
|
2972
|
#
|
2973
|
# server time
|
2974
|
#
|
2975
|
} elsif ($cmd eq "servertime" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2976
|
sock_write ($fh, join ("", time, " ", scalar (localtime), "\n"));
|
2977
|
sock_write ($fh, "220 servertime completed\n");
|
2978
|
|
2979
|
#
|
2980
|
# check auth
|
2981
|
#
|
2982
|
} elsif ($cmd eq "checkauth") {
|
2983
|
@_ = split(' ',$args);
|
2984
|
$cmd = $_[0];
|
2985
|
$user = $clients{$cl}->{"user"};
|
2986
|
# Note that we call check_auth without syslogging here.
|
2987
|
if (check_auth($clients{$cl}->{"user"}, $cmd, 1))
|
2988
|
{
|
2989
|
sock_write ($fh, "220 command authorized\n");
|
2990
|
}
|
2991
|
|
2992
|
else
|
2993
|
{
|
2994
|
sock_write ($fh, "520 command could not be executed\n");
|
2995
|
}
|
2996
|
|
2997
|
|
2998
|
} else {
|
2999
|
sock_write ($fh, "520 command could not be executed, unknown command\n");
|
3000
|
}
|
3001
|
|
3002
|
save_state ("disabled") if ($stchanged);
|
3003
|
syslog ('info', "finished client command \"$l\"")
|
3004
|
if ($l !~ /^\s*login/i);
|
3005
|
|
3006
|
}
|
3007
|
|
3008
|
|
3009
|
sub client_write_opstatus {
|
3010
|
my $fh = shift;
|
3011
|
my ($group, $service) = @_;
|
3012
|
|
3013
|
my $sref = \%{$watch{$group}->{$service}};
|
3014
|
my $summary = esc_str ($sref->{"_last_summary"}, 1);
|
3015
|
my $detail = esc_str ($sref->{"_last_detail"}, 1);
|
3016
|
my $depend = esc_str ($sref->{"depend"}, 1);
|
3017
|
my $hostdepend = esc_str ($sref->{"hostdepend"}, 1);
|
3018
|
my $monitordepend = esc_str ($sref->{"monitordepend"}, 1);
|
3019
|
my $alertdepend = esc_str ($sref->{"alertdepend"}, 1);
|
3020
|
my $monitor = esc_str ($sref->{"monitor"}, 1);
|
3021
|
|
3022
|
my $comment;
|
3023
|
if ($sref->{"_ack"} != 0) {
|
3024
|
$comment = esc_str ($sref->{"_ack_comment"}, 1);
|
3025
|
} else {
|
3026
|
$comment = '';
|
3027
|
}
|
3028
|
|
3029
|
my $alerts_sent = 0;
|
3030
|
my $l = 0;
|
3031
|
foreach my $period (keys %{$sref->{"periods"}})
|
3032
|
{
|
3033
|
$alerts_sent += $sref->{"periods"}->{$period}->{"_alert_sent"} if (!defined($sref->{"periods"}{$period}{"alerts_dont_count"}));
|
3034
|
$l = $sref->{"periods"}->{$period}->{"_last_alert"}
|
3035
|
if (defined $sref->{"periods"}->{$period}->{"_last_alert"} && $sref->{"periods"}->{$period}->{"_last_alert"} > $l);
|
3036
|
}
|
3037
|
|
3038
|
my $buf = sprintf("group=$group service=$service opstatus=$sref->{_op_status} last_opstatus=%s exitval=%s timer=%s last_success=%s last_trap=%s last_traphost=%s last_check=%s ack=%s ackcomment=$comment alerts_sent=$alerts_sent depstatus=%s depend=$depend hostdepend=$hostdepend monitordepend=$monitordepend alertdepend=$alertdepend monitor=$monitor last_summary=%s last_detail=%s", (defined $sref->{_last_op_status} ? $sref->{_last_op_status} : ""), (defined $sref->{_exitval} ? $sref->{_exitval} : ""), (defined $sref->{_timer} ? $sref->{_timer} : ""), (defined $sref->{_last_success} ? $sref->{_last_success} : ""), (defined $sref->{_last_trap} ? $sref->{_last_trap} : ""), (defined $sref->{_last_traphost} ? $sref->{_last_traphost} : ""), (defined $sref->{_last_check} ? $sref->{_last_check} : ""), (defined $sref->{_ack} ? $sref->{_ack} : ""), (defined $sref->{"_depend_status"} ? int ($sref->{"_depend_status"}) : ""), $summary, $detail);
|
3039
|
|
3040
|
$buf .= " last_failure=$sref->{_last_failure}"
|
3041
|
if ($sref->{"_last_failure"});
|
3042
|
|
3043
|
|
3044
|
if ($sref->{"interval"})
|
3045
|
{
|
3046
|
$buf .= " interval=$sref->{interval}" .
|
3047
|
" monitor_duration=$sref->{_monitor_duration}" .
|
3048
|
" monitor_running=$sref->{_monitor_running}"
|
3049
|
}
|
3050
|
|
3051
|
$buf .= " exclude_period=". esc_str($sref->{exclude_period})
|
3052
|
if ($sref->{"exclude_period"} ne "");
|
3053
|
|
3054
|
$buf .= " exclude_hosts=" .
|
3055
|
esc_str(join (" ", keys %{$sref->{exclude_hosts}}))
|
3056
|
if (keys %{$sref->{"exclude_hosts"}});
|
3057
|
|
3058
|
$buf .= " randskew=$sref->{randskew}"
|
3059
|
if ($sref->{"randskew"});
|
3060
|
|
3061
|
|
3062
|
$buf .= " last_alert=$l"
|
3063
|
if ($l);
|
3064
|
|
3065
|
if ($sref->{"_first_failure"})
|
3066
|
{
|
3067
|
my $t = time - $sref->{"_first_failure"};
|
3068
|
|
3069
|
$buf .= " first_failure=$sref->{_first_failure}" .
|
3070
|
" failure_duration=$t";
|
3071
|
}
|
3072
|
|
3073
|
# if ($sref->{"_first_success"})
|
3074
|
# {
|
3075
|
# my $t = time - $sref->{"_first_success"};
|
3076
|
|
3077
|
# $buf .= " first_success=$sref->{_first_success}" .
|
3078
|
# " success_duration=$t";
|
3079
|
# }
|
3080
|
|
3081
|
$buf .= "\n";
|
3082
|
|
3083
|
sock_write ($fh, $buf);
|
3084
|
}
|
3085
|
|
3086
|
|
3087
|
#
|
3088
|
# show usage
|
3089
|
#
|
3090
|
sub usage {
|
3091
|
print <<"EOF";
|
3092
|
usage: mon [-a dir] [-A file] [-b dir] [-B dir] [-c config] [-d]
|
3093
|
[-D dir] [-f] [-h] [-i secs] [-k num] [-l [type]] [-L dir]
|
3094
|
[-M [path]] [-m num] [-p num] [-P file] [-r num] [-s dir]
|
3095
|
[-S] [-t num]
|
3096
|
mon -v
|
3097
|
|
3098
|
-a dir alert script dir
|
3099
|
-A file authorization file
|
3100
|
-b dir base directory for alerts and monitors (basedir)
|
3101
|
-B dir base directory for configuration files (cfbasedir)
|
3102
|
-c config config file, defaults to "mon.cf"
|
3103
|
-d debug
|
3104
|
-D dir state directory (statedir)
|
3105
|
-f fork and become a daemon
|
3106
|
-h this help
|
3107
|
-i secs sleep interval (seconds), defaults to 1
|
3108
|
-k num keep history of last num events
|
3109
|
-l [type] load some types of old state from statedir. type can
|
3110
|
be disabled (default), opstatus or all.
|
3111
|
-L dir log directory (logdir)
|
3112
|
-M [path] pre-process config file with m4. if m4 isn't in \$PATH
|
3113
|
specify the path to m4 here
|
3114
|
-m num throttle at maximum number of monitor processes
|
3115
|
-O facility syslog facility to use
|
3116
|
-p num server listens on port num
|
3117
|
-P file PID file
|
3118
|
-r num randomize startup schedule
|
3119
|
-s dir monitor script dir
|
3120
|
-S start with scheduler stopped
|
3121
|
-t port trap port
|
3122
|
-v print version
|
3123
|
|
3124
|
Report bugs to $AUTHOR
|
3125
|
$RCSID
|
3126
|
EOF
|
3127
|
}
|
3128
|
|
3129
|
|
3130
|
#
|
3131
|
# become a daemon
|
3132
|
#
|
3133
|
sub daemon {
|
3134
|
my $pid;
|
3135
|
|
3136
|
if ($pid = fork()) {
|
3137
|
# the parent goes away all happy and stuff
|
3138
|
exit (0);
|
3139
|
} elsif (!defined $pid) {
|
3140
|
die "could not fork: $!\n";
|
3141
|
}
|
3142
|
|
3143
|
setsid();
|
3144
|
|
3145
|
#
|
3146
|
# make it so that we cannot regain a controlling terminal
|
3147
|
#
|
3148
|
if ($pid = fork()) {
|
3149
|
# the parent goes away all happy and stuff
|
3150
|
exit (0);
|
3151
|
} elsif (!defined $pid) {
|
3152
|
syslog ('err', "could not fork: $!");
|
3153
|
exit 1;
|
3154
|
}
|
3155
|
|
3156
|
# chdir ('/');
|
3157
|
umask (022);
|
3158
|
|
3159
|
if (!open (N, "+>>" . $CF{"MONERRFILE"}))
|
3160
|
{
|
3161
|
syslog ("err", "could not open error output file $CF{'MONERRFILE'}: $!");
|
3162
|
exit (1);
|
3163
|
}
|
3164
|
|
3165
|
select (N);
|
3166
|
$| = 1;
|
3167
|
select (STDOUT);
|
3168
|
|
3169
|
if (!open (STDIN, "/dev/null"))
|
3170
|
{
|
3171
|
syslog ("err", "could not open STDIN from /dev/null: $!");
|
3172
|
exit (1);
|
3173
|
}
|
3174
|
|
3175
|
print N "Mon starting at ".localtime(time)."\n";
|
3176
|
if (!open(STDOUT, ">&N") ||
|
3177
|
!open (STDERR, ">&N")) {
|
3178
|
syslog ("err", "could not redirect: $!");
|
3179
|
exit(1);
|
3180
|
}
|
3181
|
syslog ('info', "running as daemon");
|
3182
|
}
|
3183
|
|
3184
|
|
3185
|
#
|
3186
|
# debug
|
3187
|
#
|
3188
|
sub debug {
|
3189
|
my ($level, @l) = @_;
|
3190
|
|
3191
|
return if (!defined $opt{"d"} || $level > $opt{"d"});
|
3192
|
|
3193
|
if ($opt{"d"} && !$opt{"f"}) {
|
3194
|
print STDERR @l;
|
3195
|
} else {
|
3196
|
syslog ('debug', join ('', @l));
|
3197
|
}
|
3198
|
}
|
3199
|
|
3200
|
|
3201
|
#
|
3202
|
# die_die
|
3203
|
#
|
3204
|
sub die_die {
|
3205
|
my ($level, $msg) = @_;
|
3206
|
|
3207
|
die "[$level] $msg\n" if ($opt{"d"});
|
3208
|
|
3209
|
syslog ($level, "fatal, $msg");
|
3210
|
closelog();
|
3211
|
exit (1);
|
3212
|
}
|
3213
|
|
3214
|
|
3215
|
#
|
3216
|
# handle cleanup of exited processes
|
3217
|
# trigger alerts on failures (or send no alert if disabled)
|
3218
|
# do some accounting
|
3219
|
#
|
3220
|
sub proc_cleanup {
|
3221
|
my ($summary, $tmnow, $buf);
|
3222
|
|
3223
|
$tmnow = time;
|
3224
|
return if (keys %running == 0);
|
3225
|
|
3226
|
while ((my $p = waitpid (-1, &WNOHANG)) >0)
|
3227
|
{
|
3228
|
next if (!exists $runningpid{$p});
|
3229
|
my ($group, $service) = split (/\//, $runningpid{$p});
|
3230
|
my $sref = \%{$watch{$group}->{$service}};
|
3231
|
|
3232
|
#
|
3233
|
# suck in any extra data
|
3234
|
#
|
3235
|
my $fh = $fhandles{$runningpid{$p}};
|
3236
|
while (my $z = sysread ($fh, $buf, 8192))
|
3237
|
{
|
3238
|
$ibufs{$runningpid{$p}} .= $buf;
|
3239
|
}
|
3240
|
|
3241
|
debug (1, "PID $p ($runningpid{$p}) exited with [" . int ($?>>8) . "]\n");
|
3242
|
|
3243
|
$sref->{"_monitor_duration"} = $tmnow - $sref->{"_last_check"};
|
3244
|
|
3245
|
$sref->{"_monitor_running"} = 0;
|
3246
|
|
3247
|
process_event ("m", $group, $service, int ($?>>8), $ibufs{$runningpid{$p}});
|
3248
|
|
3249
|
reset_timer ($group, $service);
|
3250
|
|
3251
|
remove_proc ($p);
|
3252
|
}
|
3253
|
}
|
3254
|
|
3255
|
|
3256
|
#
|
3257
|
# handle the event where a monitor exits or a trap is received
|
3258
|
#
|
3259
|
# $type is "m" for monitor, "t" for trap
|
3260
|
#
|
3261
|
sub process_event {
|
3262
|
my ($type, $group, $service, $exitval, $output) = @_;
|
3263
|
|
3264
|
debug (1, "process_event type=$type group=$group service=$service exitval=$exitval output=[$output]\n");
|
3265
|
|
3266
|
my $sref = \%{$watch{$group}->{$service}};
|
3267
|
my $tmnow = time;
|
3268
|
|
3269
|
my ($summary, $detail) = split("\n", $output, 2);
|
3270
|
|
3271
|
$sref->{"_exitval"} = $exitval;
|
3272
|
|
3273
|
if ($sref->{"depend"} ne "" &&
|
3274
|
$sref->{"dep_behavior"} eq "a")
|
3275
|
{
|
3276
|
dep_ok ($sref, 'a');
|
3277
|
}
|
3278
|
|
3279
|
#
|
3280
|
# error exit value
|
3281
|
#
|
3282
|
if ($exitval)
|
3283
|
{
|
3284
|
#
|
3285
|
# accounting
|
3286
|
#
|
3287
|
$sref->{"_failure_count"}++;
|
3288
|
$sref->{"_consec_failures"}++;
|
3289
|
$sref->{"_last_failure"} = $tmnow;
|
3290
|
if ($sref->{"_op_status"} == $STAT_OK ||
|
3291
|
$sref->{"_op_status"} == $STAT_UNKNOWN ||
|
3292
|
$sref->{"_op_status"} == $STAT_UNTESTED)
|
3293
|
{
|
3294
|
$sref->{"_first_failure"} = $tmnow;
|
3295
|
}
|
3296
|
set_op_status ($group, $service, $STAT_FAIL);
|
3297
|
|
3298
|
$summary = "(NO SUMMARY)" if ($summary =~ /^\s*$/m);
|
3299
|
$sref->{"_last_summary"} = $summary;
|
3300
|
$sref->{"_last_detail"} = $detail;
|
3301
|
shift @last_failures if (@last_failures > $CF{"MAX_KEEP"});
|
3302
|
push @last_failures, "$group $service" .
|
3303
|
" $tm $summary";
|
3304
|
syslog ('crit', "failure for $last_failures[-1]");
|
3305
|
|
3306
|
#
|
3307
|
# send an alert if necessary
|
3308
|
#
|
3309
|
if ($type eq "m")
|
3310
|
{
|
3311
|
do_alert ($group, $service, $output, $exitval, $FL_MONITOR);
|
3312
|
#
|
3313
|
# change interval if needed
|
3314
|
#
|
3315
|
if (defined ($sref->{"failure_interval"}) &&
|
3316
|
!defined $sref->{"_old_interval"})
|
3317
|
{
|
3318
|
$sref->{"_old_interval"} = $sref->{"interval"};
|
3319
|
$sref->{"interval"} = $sref->{"failure_interval"};
|
3320
|
$sref->{"_next_check"} = 0;
|
3321
|
}
|
3322
|
}
|
3323
|
|
3324
|
elsif ($type eq "t")
|
3325
|
{
|
3326
|
do_alert ($group, $service, $output, $exitval, $FL_TRAP);
|
3327
|
}
|
3328
|
|
3329
|
elsif ($type eq "T")
|
3330
|
{
|
3331
|
do_alert ($group, $service, $output, $exitval, $FL_TRAPTIMEOUT);
|
3332
|
}
|
3333
|
|
3334
|
$sref->{"_failure_output"} = $output;
|
3335
|
}
|
3336
|
|
3337
|
#
|
3338
|
# success exit value
|
3339
|
#
|
3340
|
else
|
3341
|
{
|
3342
|
if ($CF{"DTLOGGING"} && defined ($sref->{"_op_status"}) &&
|
3343
|
$sref->{"_op_status"} == $STAT_FAIL)
|
3344
|
{
|
3345
|
write_dtlog ($sref, $group, $service);
|
3346
|
}
|
3347
|
|
3348
|
my $old_status = $sref->{"_op_status"};
|
3349
|
set_op_status ($group, $service, $STAT_OK);
|
3350
|
|
3351
|
if ($type eq "t")
|
3352
|
{
|
3353
|
$sref->{"_last_uptrap"} = $tmnow;
|
3354
|
}
|
3355
|
|
3356
|
#
|
3357
|
# if this service has just come back up and
|
3358
|
# we are paying attention to this event,
|
3359
|
# let someone know
|
3360
|
#
|
3361
|
if (($sref->{"redistribute"} ne '') ||
|
3362
|
((defined ($sref->{"_op_status"})) &&
|
3363
|
($old_status == $STAT_FAIL) &&
|
3364
|
(defined($sref->{"_upalert"})) &&
|
3365
|
(!defined($sref->{"upalertafter"})
|
3366
|
|| (($tmnow - $sref->{"_first_failure"}) >= $sref->{"upalertafter"}))))
|
3367
|
{
|
3368
|
# Save the last failing monitor's output for posterity
|
3369
|
$sref->{"_upalertoutput"}= $sref->{"_last_output"};
|
3370
|
do_alert ($group, $service, $sref->{"_upalertoutput"}, 0, $FL_UPALERT);
|
3371
|
}
|
3372
|
|
3373
|
#
|
3374
|
# send also when no upalertafter set
|
3375
|
# cabo: Modified to always send
|
3376
|
#
|
3377
|
#elsif (defined($sref->{"_upalert"}) && $old_status == $STAT_FAIL)
|
3378
|
elsif (defined($sref->{"_upalert"}) && ($old_status == $STAT_FAIL || $old_status == $STAT_UNTESTED))
|
3379
|
{
|
3380
|
do_alert ($group, $service, $sref->{"_upalertoutput"}, 0, $FL_UPALERT);
|
3381
|
}
|
3382
|
|
3383
|
$sref->{"_ack"} = 0;
|
3384
|
$sref->{"_ack_comment"} = '';
|
3385
|
$sref->{"_first_failure"} = 0;
|
3386
|
$sref->{"_last_failure"} = 0;
|
3387
|
$sref->{"_consec_failures"} = 0;
|
3388
|
$sref->{"_failure_output"} = "";
|
3389
|
$sref->{"_last_summary"} = $summary;
|
3390
|
$sref->{"_last_detail"} = $detail;
|
3391
|
|
3392
|
#
|
3393
|
# reset the alertevery timer
|
3394
|
#
|
3395
|
foreach my $period (keys %{$sref->{"periods"}})
|
3396
|
{
|
3397
|
#
|
3398
|
# "alertevery strict" should not reset _last_alert
|
3399
|
#
|
3400
|
if (!$sref->{"periods"}->{$period}->{"_alertevery_strict"})
|
3401
|
{
|
3402
|
$sref->{"periods"}->{$period}->{"_last_alert"} = 0;
|
3403
|
}
|
3404
|
|
3405
|
$sref->{"periods"}->{$period}->{"_1stfailtime"} = 0;
|
3406
|
$sref->{"periods"}->{$period}->{"_alert_sent"} = 0;
|
3407
|
}
|
3408
|
|
3409
|
#
|
3410
|
# change interval back to original
|
3411
|
#
|
3412
|
if (defined ($sref->{"failure_interval"}) &&
|
3413
|
$sref->{"_old_interval"} != undef)
|
3414
|
{
|
3415
|
$sref->{"interval"} = $sref->{"_old_interval"};
|
3416
|
$sref->{"_old_interval"} = undef;
|
3417
|
$sref->{"_next_check"} = 0;
|
3418
|
}
|
3419
|
|
3420
|
$sref->{"_last_success"} = $tmnow;
|
3421
|
|
3422
|
}
|
3423
|
|
3424
|
#
|
3425
|
# save the output
|
3426
|
#
|
3427
|
$sref->{"_last_output"} = $output;
|
3428
|
$sref->{"_last_summary"} = $summary;
|
3429
|
$sref->{"_last_detail"} = $detail;
|
3430
|
}
|
3431
|
|
3432
|
|
3433
|
#
|
3434
|
# collect output from running processes
|
3435
|
#
|
3436
|
sub collect_output {
|
3437
|
my ($buf, $rout);
|
3438
|
|
3439
|
return if (!keys %running);
|
3440
|
|
3441
|
my $nfound = select ($rout=$fdset_rbits, undef, undef, 0);
|
3442
|
debug (1, "select returned $nfound file handles\n");
|
3443
|
|
3444
|
return if ($! == &EINTR);
|
3445
|
|
3446
|
if ($nfound) {
|
3447
|
#
|
3448
|
# look for the file descriptors that are readable,
|
3449
|
# and try to read as much as possible from them
|
3450
|
#
|
3451
|
foreach my $k (keys %fhandles) {
|
3452
|
my $fh = $fhandles{$k};
|
3453
|
if (vec ($rout, fileno($fh), 1) == 1) {
|
3454
|
my $z = 0;
|
3455
|
while ($z = sysread ($fh, $buf, 8192)) {
|
3456
|
$ibufs{$k} .= $buf;
|
3457
|
debug (1, "[$buf] from $fh\n");
|
3458
|
}
|
3459
|
|
3460
|
#
|
3461
|
# ignore if EAGAIN, since we're nonblocking
|
3462
|
#
|
3463
|
if (!defined($z) && $! == &EAGAIN) {
|
3464
|
|
3465
|
#
|
3466
|
# error on this descriptor
|
3467
|
#
|
3468
|
} elsif (!defined($z)) {
|
3469
|
debug (1, "error on $fh: $!\n");
|
3470
|
syslog ('err', "error on $fh: $!");
|
3471
|
vec($fdset_rbits, fileno($fh), 1) = 0;
|
3472
|
} elsif ($z == 0 && $! == &EAGAIN) {
|
3473
|
debug (1, "EAGAIN on $fh\n");
|
3474
|
|
3475
|
#
|
3476
|
# if EOF encountered, stop trying to
|
3477
|
# get input from this file descriptor
|
3478
|
#
|
3479
|
} elsif ($z == 0) {
|
3480
|
debug (1, "EOF on $fh\n");
|
3481
|
vec($fdset_rbits, fileno($fh), 1) = 0;
|
3482
|
|
3483
|
}
|
3484
|
}
|
3485
|
}
|
3486
|
}
|
3487
|
}
|
3488
|
|
3489
|
|
3490
|
|
3491
|
|
3492
|
#
|
3493
|
# handle forking a monitor process, and set up variables
|
3494
|
#
|
3495
|
sub run_monitor {
|
3496
|
my ($group, $service) = @_;
|
3497
|
my (@args, @groupargs, $pid, @ghosts, $monitor, $monitorargs);
|
3498
|
|
3499
|
my $sref = \%{$watch{$group}->{$service}};
|
3500
|
|
3501
|
($monitor, $monitorargs) = ($sref->{"monitor"} =~ /^(\S+)(\s+(.*))?$/);
|
3502
|
|
3503
|
if (!defined $MONITORHASH{$monitor} || ! -f $MONITORHASH{$monitor}) {
|
3504
|
syslog ('err', "no monitor found while trying to run [$monitor]");
|
3505
|
return undef;
|
3506
|
} else {
|
3507
|
$monitor = $MONITORHASH{$monitor};
|
3508
|
}
|
3509
|
|
3510
|
$monitor .= " " . $monitorargs if ($monitorargs);
|
3511
|
|
3512
|
@ghosts = ();
|
3513
|
|
3514
|
#
|
3515
|
# if monitor ends with ";;", do not append groups
|
3516
|
# to command line
|
3517
|
#
|
3518
|
if ($monitor =~ /;;\s*$/) {
|
3519
|
$monitor =~ s/\s*;;\s*$//;
|
3520
|
@args = quotewords ('\s+', 0, $monitor);
|
3521
|
@ghosts = (1);
|
3522
|
|
3523
|
#
|
3524
|
# exclude disabled hosts
|
3525
|
#
|
3526
|
} else {
|
3527
|
@ghosts = grep (!/^\*/, @{$groups{$group}});
|
3528
|
|
3529
|
#
|
3530
|
# per-service excludes
|
3531
|
#
|
3532
|
if (keys %{$sref->{"exclude_hosts"}})
|
3533
|
{
|
3534
|
my @g = ();
|
3535
|
|
3536
|
for (my $i=0; $i<@ghosts; $i++)
|
3537
|
{
|
3538
|
push (@g, $ghosts[$i])
|
3539
|
if !$sref->{"exclude_hosts"}->{$ghosts[$i]};
|
3540
|
}
|
3541
|
|
3542
|
@ghosts = @g;
|
3543
|
}
|
3544
|
|
3545
|
#
|
3546
|
# per-host dependencies
|
3547
|
#
|
3548
|
if ((defined $sref->{"depend"} && $sref->{"depend"} ne "" && $sref->{"dep_behavior"} eq 'hm')
|
3549
|
|| (defined $sref->{"hostdepend"} && $sref->{"hostdepend"} ne ""))
|
3550
|
{
|
3551
|
my @g = ();
|
3552
|
my $sum = dep_summary($sref);
|
3553
|
|
3554
|
for (my $i=0; $i<@ghosts; $i++)
|
3555
|
{
|
3556
|
push (@g, $ghosts[$i])
|
3557
|
if (! grep /\Q$ghosts[$i]\E/, @$sum);
|
3558
|
}
|
3559
|
|
3560
|
@ghosts = @g;
|
3561
|
}
|
3562
|
|
3563
|
@args = (quotewords ('\s+', 0, $monitor), @ghosts);
|
3564
|
}
|
3565
|
|
3566
|
if (@ghosts == 0 && !defined ($sref->{"allow_empty_group"}))
|
3567
|
{
|
3568
|
syslog ('err', "monitor for $group/$service" .
|
3569
|
" not called because of no host arguments\n");
|
3570
|
reset_timer ($group, $service);
|
3571
|
}
|
3572
|
|
3573
|
else
|
3574
|
{
|
3575
|
$fhandles{"$group/$service"} = new FileHandle;
|
3576
|
|
3577
|
$pid = open ($fhandles{"$group/$service"}, '-|');
|
3578
|
|
3579
|
if (!defined $pid)
|
3580
|
{
|
3581
|
syslog ('err', "Could not fork: $!");
|
3582
|
delete $fhandles{"$group/$service"};
|
3583
|
return 0;
|
3584
|
}
|
3585
|
|
3586
|
elsif ($pid == 0)
|
3587
|
{
|
3588
|
open(STDERR, '>&STDOUT')
|
3589
|
or syslog ('err', "Could not dup stderr: $!");
|
3590
|
|
3591
|
open(STDIN, "</dev/null")
|
3592
|
or syslog ('err', "Could not connect stdin to /dev/null: $!");
|
3593
|
|
3594
|
my $v;
|
3595
|
|
3596
|
foreach $v (keys %{$sref->{"ENV"}})
|
3597
|
{
|
3598
|
$ENV{$v} = $sref->{"ENV"}->{$v};
|
3599
|
}
|
3600
|
$ENV{"MON_GROUP"} = $group;
|
3601
|
$ENV{"MON_SERVICE"} = $service;
|
3602
|
$ENV{"MON_LAST_SUMMARY"} = $sref->{"_last_summary"} if (defined $sref->{"_last_summary"});
|
3603
|
$ENV{"MON_LAST_OUTPUT"} = $sref->{"_last_output"} if (defined $sref->{"_last_output"});
|
3604
|
$ENV{"MON_LAST_FAILURE"} = $sref->{"_last_failure"} if (defined $sref->{"_last_failure"});
|
3605
|
$ENV{"MON_FIRST_FAILURE"} = $sref->{"_first_failure"} if (defined $sref->{"_first_failure"});
|
3606
|
$ENV{"MON_DEPEND_STATUS"} = $sref->{"_depend_status"} if (defined $sref->{"_depend_status"});
|
3607
|
$ENV{"MON_FIRST_SUCCESS"} = $sref->{"_first_success"} if (defined $sref->{"_first_success"});
|
3608
|
$ENV{"MON_LAST_SUCCESS"} = $sref->{"_last_success"} if (defined $sref->{"_last_success"});
|
3609
|
$ENV{"MON_DESCRIPTION"} = $sref->{"description"} if (defined $sref->{"description"});
|
3610
|
$ENV{"MON_STATEDIR"} = $CF{"STATEDIR"};
|
3611
|
$ENV{"MON_LOGDIR"} = $CF{"LOGDIR"};
|
3612
|
$ENV{"MON_CFBASEDIR"} = $CF{"CFBASEDIR"};
|
3613
|
|
3614
|
if (!exec @args)
|
3615
|
{
|
3616
|
syslog ('err', "could not exec '@args': $!");
|
3617
|
exit (1);
|
3618
|
}
|
3619
|
}
|
3620
|
|
3621
|
$sref->{"_last_check"} = scalar (time);
|
3622
|
$sref->{"_monitor_running"} = 1;
|
3623
|
|
3624
|
debug (1, "watching file handle ", fileno ($fhandles{"$group/$service"}),
|
3625
|
" for $group/$service\n");
|
3626
|
|
3627
|
#
|
3628
|
# set nonblocking I/O and setup bit vector for select(2)
|
3629
|
#
|
3630
|
configure_filehandle ($fhandles{"$group/$service"}) ||
|
3631
|
syslog ("err", "could not configure filehandle for $group/$service: $!");
|
3632
|
vec ($fdset_rbits,
|
3633
|
fileno($fhandles{"$group/$service"}), 1) = 1;
|
3634
|
$fdset_ebits |= $fdset_rbits;
|
3635
|
|
3636
|
#
|
3637
|
# note that this is running
|
3638
|
#
|
3639
|
$running{"$group/$service"} = 1;
|
3640
|
$runningpid{$pid} = "$group/$service";
|
3641
|
$ibufs{"$group/$service"} = "";
|
3642
|
$procs++;
|
3643
|
}
|
3644
|
|
3645
|
if ($sref->{"_next_check"})
|
3646
|
{
|
3647
|
$sref->{"_next_check"} += $sref->{"interval"};
|
3648
|
} else {
|
3649
|
$sref->{"_next_check"} = time() + $sref->{"interval"};
|
3650
|
}
|
3651
|
|
3652
|
|
3653
|
|
3654
|
|
3655
|
}
|
3656
|
|
3657
|
|
3658
|
#
|
3659
|
# set the countdown timer for this service
|
3660
|
#
|
3661
|
sub reset_timer {
|
3662
|
my ($group, $service) = @_;
|
3663
|
|
3664
|
my $sref = \%{$watch{$group}->{$service}};
|
3665
|
|
3666
|
if ($sref->{"randskew"} != 0)
|
3667
|
{
|
3668
|
$sref->{"_timer"} = $sref->{"interval"} +
|
3669
|
(int (rand (2)) == 0 ? -int(rand($sref->{"randskew"}) + 1) :
|
3670
|
int(rand($sref->{"randskew"})+1));
|
3671
|
}
|
3672
|
|
3673
|
elsif ($sref->{"_next_check"})
|
3674
|
{
|
3675
|
if (($sref->{"_timer"} = $sref->{"_next_check"} - time()) < 0)
|
3676
|
{
|
3677
|
$sref->{"_timer"} = $sref->{"interval"};
|
3678
|
}
|
3679
|
}
|
3680
|
|
3681
|
else
|
3682
|
{
|
3683
|
$sref->{"_timer"} = $sref->{"interval"};
|
3684
|
}
|
3685
|
}
|
3686
|
|
3687
|
|
3688
|
#
|
3689
|
# randomize the delay before each test
|
3690
|
# $opt{"randstart"} is seconds
|
3691
|
#
|
3692
|
sub randomize_startdelay {
|
3693
|
my ($group, $service);
|
3694
|
|
3695
|
foreach $group (keys %watch) {
|
3696
|
foreach $service (keys %{$watch{$group}}) {
|
3697
|
$watch{$group}->{$service}->{"_timer"} =
|
3698
|
int (rand ($CF{"RANDSTART"}));
|
3699
|
}
|
3700
|
}
|
3701
|
|
3702
|
}
|
3703
|
|
3704
|
|
3705
|
#
|
3706
|
# return 1 if $val is within $range,
|
3707
|
# where $range = "number" or "number-number"
|
3708
|
#
|
3709
|
sub inRange {
|
3710
|
my ($val, $range) = @_;
|
3711
|
my ($retval);
|
3712
|
|
3713
|
$retval = 0;
|
3714
|
if ($range =~ /^(\d+)$/ && $val == $1) {
|
3715
|
$retval = 1
|
3716
|
|
3717
|
} elsif ($range =~ /^(\d+)\s*-\s*(\d+)$/ &&
|
3718
|
($val >= $1 && $val <= $2)) {
|
3719
|
$retval = 1
|
3720
|
}
|
3721
|
|
3722
|
$retval;
|
3723
|
}
|
3724
|
|
3725
|
|
3726
|
#
|
3727
|
# disable ($cmd==0) or enable a watch
|
3728
|
#
|
3729
|
sub disen_watch {
|
3730
|
my ($w, $cmd) = @_;
|
3731
|
|
3732
|
return undef if (!defined ($watch{$w}));
|
3733
|
if (!$cmd) {
|
3734
|
$watch_disabled{$w} = 1;
|
3735
|
} else {
|
3736
|
$watch_disabled{$w} = 0;
|
3737
|
}
|
3738
|
}
|
3739
|
|
3740
|
|
3741
|
#
|
3742
|
# disable ($cmd==0) or enable a service
|
3743
|
#
|
3744
|
sub disen_service {
|
3745
|
my ($g, $s, $cmd) = @_;
|
3746
|
my ($snum);
|
3747
|
|
3748
|
return undef if (!defined $watch{$g});
|
3749
|
return undef if (!defined $watch{$g}->{$s});
|
3750
|
if (!$cmd) {
|
3751
|
$watch{$g}->{$s}->{"disable"} = 1;
|
3752
|
} else {
|
3753
|
$watch{$g}->{$s}->{"disable"} = 0;
|
3754
|
}
|
3755
|
}
|
3756
|
|
3757
|
|
3758
|
#
|
3759
|
# disable ($cmd==0) or enable a host
|
3760
|
#
|
3761
|
sub disen_host {
|
3762
|
my ($h, $cmd) = @_;
|
3763
|
|
3764
|
my $found = undef;
|
3765
|
|
3766
|
foreach my $g (keys %groups) {
|
3767
|
if ((!defined $cmd) || $cmd == 0) {
|
3768
|
if (grep (s/^$h$/*$h/, @{$groups{$g}}))
|
3769
|
{
|
3770
|
$found = 1;
|
3771
|
}
|
3772
|
}
|
3773
|
else
|
3774
|
{
|
3775
|
if (grep (s/^\*$h$/$h/, @{$groups{$g}}))
|
3776
|
{
|
3777
|
$found = 1;
|
3778
|
}
|
3779
|
}
|
3780
|
}
|
3781
|
|
3782
|
$found;
|
3783
|
}
|
3784
|
|
3785
|
|
3786
|
sub host_exists {
|
3787
|
my $host = shift;
|
3788
|
|
3789
|
my $found = 0;
|
3790
|
|
3791
|
foreach my $g (keys %groups) {
|
3792
|
if (grep (/^$host$/, @{$groups{$g}}))
|
3793
|
{
|
3794
|
$found = 1;
|
3795
|
last;
|
3796
|
}
|
3797
|
}
|
3798
|
|
3799
|
$found;
|
3800
|
}
|
3801
|
|
3802
|
|
3803
|
|
3804
|
#
|
3805
|
# given a host, search groups and return an array of group
|
3806
|
# names which have that host as their only member. return
|
3807
|
# an empty array if no group found
|
3808
|
#
|
3809
|
#
|
3810
|
sub host_singleton_group {
|
3811
|
my $host = shift;
|
3812
|
|
3813
|
my @found;
|
3814
|
|
3815
|
foreach my $g (keys %groups) {
|
3816
|
if (grep (/^\*?$host$/, @{$groups{$g}}) &&
|
3817
|
scalar(@{$groups{$g}}) == 1)
|
3818
|
{
|
3819
|
push (@found, $g);
|
3820
|
}
|
3821
|
}
|
3822
|
|
3823
|
return (@found);
|
3824
|
}
|
3825
|
|
3826
|
|
3827
|
#
|
3828
|
# save state
|
3829
|
#
|
3830
|
sub save_state {
|
3831
|
my (@states) = @_;
|
3832
|
my ($group, $service, @l, $state);
|
3833
|
|
3834
|
foreach $state (@states) {
|
3835
|
if ($state eq "disabled" || $state eq "all") {
|
3836
|
if (!open (STATE, ">$CF{STATEDIR}/disabled")) {
|
3837
|
syslog ("err", "could not write to state file: $!");
|
3838
|
next;
|
3839
|
}
|
3840
|
|
3841
|
foreach $group (keys %groups) {
|
3842
|
@l = grep (/^\*/, @{$groups{$group}});
|
3843
|
if (@l) {
|
3844
|
grep (s/^\*//, @l);
|
3845
|
grep { print STATE "disable host $_\n" } @l;
|
3846
|
}
|
3847
|
}
|
3848
|
foreach $group (keys %watch) {
|
3849
|
if (exists $watch_disabled{$group} && $watch_disabled{$group} == 1) {
|
3850
|
print STATE "disable watch $group\n";
|
3851
|
}
|
3852
|
foreach $service (keys %{$watch{$group}}) {
|
3853
|
if (defined $watch{$group}->{$service}->{'disable'}
|
3854
|
&& $watch{$group}->{$service}->{'disable'} == 1) {
|
3855
|
print STATE "disable service $group $service\n";
|
3856
|
}
|
3857
|
}
|
3858
|
}
|
3859
|
close (STATE);
|
3860
|
|
3861
|
}
|
3862
|
|
3863
|
if ($state eq "opstatus" || $state eq "all") {
|
3864
|
if (!open (STATE, ">$CF{STATEDIR}/opstatus")) {
|
3865
|
syslog ("err", "could not write to opstatus state file: $!");
|
3866
|
next;
|
3867
|
}
|
3868
|
foreach $group (keys %watch) {
|
3869
|
foreach $service (keys %{$watch{$group}}) {
|
3870
|
print STATE "group=$group\tservice=$service";
|
3871
|
foreach my $var (qw(op_status failure_count alert_count last_success first_success
|
3872
|
consec_failures last_failure first_failure last_summary
|
3873
|
last_failure_time last_failure_summary last_failure_detail
|
3874
|
last_detail ack ack_comment last_trap last_traphost exitval
|
3875
|
last_check last_op_status failure_output trap_timer)) {
|
3876
|
print STATE "\t$var=" . esc_str($watch{$group}->{$service}->{"_$var"});
|
3877
|
}
|
3878
|
foreach my $periodlabel (keys %{$watch{$group}->{$service}->{periods}}) {
|
3879
|
foreach my $var (qw(last_alert alert_sent 1stfailtime failcount)) {
|
3880
|
print STATE "\t$periodlabel:$var=" . esc_str($watch{$group}->{$service}{periods}{$periodlabel}{"_$var"});
|
3881
|
}
|
3882
|
}
|
3883
|
print STATE "\n";
|
3884
|
}
|
3885
|
}
|
3886
|
close (STATE);
|
3887
|
}
|
3888
|
}
|
3889
|
}
|
3890
|
|
3891
|
|
3892
|
#
|
3893
|
# load state
|
3894
|
#
|
3895
|
sub load_state {
|
3896
|
my (@states) = @_;
|
3897
|
my ($l, $cmd, $args, $group, $service, $what, $state);
|
3898
|
|
3899
|
foreach $state (@states) {
|
3900
|
if ($state eq "disabled" || $state eq "all") {
|
3901
|
if (!open (STATE, "$CF{STATEDIR}/disabled")) {
|
3902
|
syslog ("err", "could not read state file: $!");
|
3903
|
next;
|
3904
|
}
|
3905
|
|
3906
|
while (defined ($l = <STATE>)) {
|
3907
|
chomp $l;
|
3908
|
($cmd, $what, $args) = split (/\s+/, $l, 3);
|
3909
|
|
3910
|
next if ($cmd ne "disable");
|
3911
|
|
3912
|
if ($what eq "host") {
|
3913
|
disen_host ($args);
|
3914
|
} elsif ($what eq "watch") {
|
3915
|
syslog ("err", "undefined watch reading state file: $l")
|
3916
|
if (!defined disen_watch ($args));
|
3917
|
} elsif ($what eq "service") {
|
3918
|
($group, $service) = split (/\s+/, $args, 2);
|
3919
|
syslog ("err",
|
3920
|
"undefined group or service reading state file: $l")
|
3921
|
if (!defined disen_service ($group, $service));
|
3922
|
}
|
3923
|
}
|
3924
|
|
3925
|
syslog ("info", "state '$state' loaded");
|
3926
|
close (STATE);
|
3927
|
}
|
3928
|
|
3929
|
if ($state eq "opstatus" || $state eq "all") {
|
3930
|
if (!open (STATE, "$CF{STATEDIR}/opstatus")) {
|
3931
|
syslog ("err", "could not read state file: $!");
|
3932
|
next;
|
3933
|
}
|
3934
|
|
3935
|
while (defined ($l = <STATE>)) {
|
3936
|
chomp $l;
|
3937
|
my %opstatus = map{ /^(.*)=(.*)$/; $1 => $2} split (/\t/, $l,);
|
3938
|
next unless (exists $opstatus{group} && exists $watch{$opstatus{group}}
|
3939
|
&& exists $opstatus{service} && exists $watch{$opstatus{group}}->{$opstatus{service}});
|
3940
|
|
3941
|
foreach my $op (keys %opstatus) {
|
3942
|
next if ($op eq 'group' || $op eq 'service');
|
3943
|
if ($op =~ /^(.*):(.*)$/) {
|
3944
|
next unless exists $watch{$opstatus{group}}->{$opstatus{service}}{periods}{$1};
|
3945
|
$watch{$opstatus{group}}->{$opstatus{service}}{periods}{$1}{"_$2"} = un_esc_str($opstatus{$op});
|
3946
|
} else {
|
3947
|
$watch{$opstatus{group}}->{$opstatus{service}}{"_$op"} = un_esc_str($opstatus{$op});
|
3948
|
}
|
3949
|
}
|
3950
|
}
|
3951
|
syslog ("info", "state '$state' loaded");
|
3952
|
close (STATE);
|
3953
|
}
|
3954
|
}
|
3955
|
}
|
3956
|
|
3957
|
|
3958
|
#
|
3959
|
# authenticate a login
|
3960
|
#
|
3961
|
sub auth {
|
3962
|
my ($type, $user, $plaintext, $host) = @_;
|
3963
|
my ($pass, %u, $l, $u, $p);
|
3964
|
|
3965
|
|
3966
|
if ($user eq "" || ($type ne 'trustlocal' && $plaintext eq "")) {
|
3967
|
syslog ('err', "an undef username or password supplied");
|
3968
|
return undef;
|
3969
|
}
|
3970
|
|
3971
|
#
|
3972
|
# standard UNIX passwd
|
3973
|
#
|
3974
|
if ($type eq "getpwnam") {
|
3975
|
(undef, $pass) = getpwnam($user);
|
3976
|
return undef
|
3977
|
if (!defined $pass);
|
3978
|
|
3979
|
if ((crypt ($plaintext, $pass)) ne $pass) {
|
3980
|
return undef;
|
3981
|
}
|
3982
|
return 1;
|
3983
|
|
3984
|
#
|
3985
|
# shadow password
|
3986
|
#
|
3987
|
} elsif ($type eq "shadow") {
|
3988
|
|
3989
|
#
|
3990
|
# "mon" authentication
|
3991
|
#
|
3992
|
} elsif ($type eq "userfile") {
|
3993
|
if (!open (U, $CF{"USERFILE"})) {
|
3994
|
syslog ('err', "could not open user file '$CF{USERFILE}': $!");
|
3995
|
return undef;
|
3996
|
}
|
3997
|
while (<U>) {
|
3998
|
next if (/^\s*#/ || /^\s*$/);
|
3999
|
chomp;
|
4000
|
($u,$p) = split (/\s*:\s*/, $_, 2);
|
4001
|
$u{$u} = $p;
|
4002
|
}
|
4003
|
close (U);
|
4004
|
return undef if (!defined($u{$user})); #user was not found in userfile
|
4005
|
return undef if ((crypt ($plaintext, $u{$user})) ne $u{$user}); #user gave wrong password
|
4006
|
return 1;
|
4007
|
|
4008
|
#
|
4009
|
# PAM authentication
|
4010
|
#
|
4011
|
} elsif ($type eq "pam") {
|
4012
|
local $PAM_username = $user;
|
4013
|
local $PAM_password = $plaintext;
|
4014
|
my $pamh;
|
4015
|
if (!ref($pamh = new Authen::PAM($CF{'PAMSERVICE'}, $PAM_username, \&pam_conv_func))) {
|
4016
|
syslog ('err', "Error code $pamh during PAM init!: $!");
|
4017
|
return undef;
|
4018
|
}
|
4019
|
my $res = $pamh->pam_authenticate ;
|
4020
|
return undef if ($res != &Authen::PAM::PAM_SUCCESS) ;
|
4021
|
return 1;
|
4022
|
} elsif ($type eq "trustlocal") {
|
4023
|
# We're configured to trust all authentications from localhost
|
4024
|
# i.e. cgi scripts are handling authentication themselves
|
4025
|
return undef if ($host ne "127.0.0.1");
|
4026
|
return 1;
|
4027
|
} else {
|
4028
|
syslog ('err', "authentication type '$type' not known");
|
4029
|
}
|
4030
|
|
4031
|
return undef;
|
4032
|
}
|
4033
|
|
4034
|
|
4035
|
#
|
4036
|
# load the table of who can do which commands
|
4037
|
#
|
4038
|
sub load_auth {
|
4039
|
my ($startup) = @_;
|
4040
|
my ($l, $cmd, $users, $u, $host, $user, $password, $sect);
|
4041
|
|
4042
|
%AUTHCMDS = ();
|
4043
|
%NOAUTHCMDS = ();
|
4044
|
%AUTHTRAPS = ();
|
4045
|
$sect = "command";
|
4046
|
|
4047
|
if (!open (C, $CF{"AUTHFILE"})) {
|
4048
|
err_startup ($startup, "could not open $CF{AUTHFILE}: $!");
|
4049
|
return undef;
|
4050
|
}
|
4051
|
|
4052
|
while (defined ($l = <C>)) {
|
4053
|
next if ($l =~ /^\s*#/ || $l =~ /^\s*$/);
|
4054
|
chomp $l;
|
4055
|
$l =~ s/^\s*//;
|
4056
|
$l =~ s/\s*$//;
|
4057
|
|
4058
|
if ($l =~ /^command\s+section/) {
|
4059
|
$sect = "command";
|
4060
|
next;
|
4061
|
} elsif ($l =~ /^trap\s+section/) {
|
4062
|
$sect = "trap";
|
4063
|
next;
|
4064
|
}
|
4065
|
|
4066
|
if ($sect eq "command") {
|
4067
|
($cmd, $users) = split (/\s*:\s*/, $l, 2);
|
4068
|
if (!defined $users) {
|
4069
|
err_startup ($startup, "could not parse line $. of auth file\n");
|
4070
|
next;
|
4071
|
}
|
4072
|
foreach $u (split (/\s*,\s*/, $users)) {
|
4073
|
if ( $u =~ /^AUTH_ANY$/ ) {
|
4074
|
# Allow all authenticated users
|
4075
|
$AUTHCMDS{"\L$cmd"}{$u} = 1;
|
4076
|
} elsif ( $u =~ /^!(.*)/ ) {
|
4077
|
# Directive is to "deny-user"
|
4078
|
$NOAUTHCMDS{"\L$cmd"}{$1} = 1;
|
4079
|
} else {
|
4080
|
# Directive is to "allow-user"
|
4081
|
$AUTHCMDS{"\L$cmd"}{$u} = 1;
|
4082
|
}
|
4083
|
}
|
4084
|
|
4085
|
} elsif ($sect eq "trap") {
|
4086
|
if ($l !~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
|
4087
|
syslog ('err', "invalid entry in trap sect of $CF{AUTHFILE}, line $.");
|
4088
|
next;
|
4089
|
}
|
4090
|
($host, $user, $password) = ($1, $2, $3);
|
4091
|
|
4092
|
if ($host eq "*") {
|
4093
|
#
|
4094
|
# allow traps from all hosts
|
4095
|
#
|
4096
|
|
4097
|
} elsif ($host =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/) {
|
4098
|
if (($host = inet_aton ($host)) eq "") {
|
4099
|
syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
|
4100
|
next;
|
4101
|
}
|
4102
|
} elsif ($host =~ /^[A-Z\d][[A-Z\.\d\-]*[[A-Z\d]+$/i) {
|
4103
|
if (($host = inet_aton ($host)) eq "") {
|
4104
|
syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
|
4105
|
next;
|
4106
|
}
|
4107
|
} else {
|
4108
|
syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
|
4109
|
next;
|
4110
|
}
|
4111
|
|
4112
|
if ($host ne "*")
|
4113
|
{
|
4114
|
$host = inet_ntoa ($host);
|
4115
|
}
|
4116
|
|
4117
|
syslog ('notice', "Adding trap auth of: $host $user $password");
|
4118
|
$AUTHTRAPS{$host}{$user} = $password;
|
4119
|
|
4120
|
} else {
|
4121
|
syslog ('err', "unknown section in $CF{AUTHFILE}: $l");
|
4122
|
}
|
4123
|
}
|
4124
|
close (C);
|
4125
|
}
|
4126
|
|
4127
|
sub load_view_users {}
|
4128
|
|
4129
|
sub view_match {
|
4130
|
my ($view, $group, $service) = @_;
|
4131
|
if (!defined($view)) {
|
4132
|
# print STDERR "No view in use\n";
|
4133
|
return 1;
|
4134
|
}
|
4135
|
|
4136
|
if (defined($group) && defined($views{$view}->{$group})) {
|
4137
|
# print STDERR "View $view contains $group\n";
|
4138
|
return 1;
|
4139
|
}
|
4140
|
if (defined($views{$view}->{$group.":".$service})) {
|
4141
|
# print STDERR "View $view contains $group:$service\n";
|
4142
|
return 1;
|
4143
|
}
|
4144
|
return 0;
|
4145
|
}
|
4146
|
|
4147
|
#
|
4148
|
# return undef if $user isn't permitted to perform $cmd
|
4149
|
# Optional third argument controls logging to syslog.
|
4150
|
# e.g.,
|
4151
|
# check_auth("joe", "disable")
|
4152
|
# will check to see if user joe is authorized to disable, and
|
4153
|
# complain to syslog if joe is not authorized
|
4154
|
# check_auth("joe", "disable", 1)
|
4155
|
# will check to see if user joe is authorized to disable but
|
4156
|
# NOT complain to syslog if joe is not authorized
|
4157
|
#
|
4158
|
sub check_auth {
|
4159
|
my ($user, $cmd, $no_syslog) = @_;
|
4160
|
|
4161
|
#
|
4162
|
# Check to see if the authenticated user is specifically
|
4163
|
# denied the ability to run this command.
|
4164
|
#
|
4165
|
if (
|
4166
|
(defined ($user) && $NOAUTHCMDS{$cmd}{$user}) ||
|
4167
|
(defined ($user) && $NOAUTHCMDS{$cmd}{"AUTH_ANY"})
|
4168
|
)
|
4169
|
{
|
4170
|
syslog ("err", "user '$user' tried '$cmd', denied");
|
4171
|
return undef;
|
4172
|
}
|
4173
|
|
4174
|
#
|
4175
|
# Check for "all". This allows any client, authenticated or
|
4176
|
# not, to execute the requested command.
|
4177
|
#
|
4178
|
return 1 if ($AUTHCMDS{$cmd}{"all"});
|
4179
|
|
4180
|
#
|
4181
|
# Check for AUTH_ANY. This allows any authenticated user to
|
4182
|
# execute the requested command.
|
4183
|
#
|
4184
|
return 1 if (defined ($user) && $AUTHCMDS{$cmd}{"AUTH_ANY"});
|
4185
|
|
4186
|
#
|
4187
|
# Check to see if the authenticated user is specifically
|
4188
|
#allowed the ability to run this command.
|
4189
|
#
|
4190
|
return 1 if (defined ($user) && $AUTHCMDS{$cmd}{$user});
|
4191
|
|
4192
|
syslog ("err", "user '$user' tried '$cmd', not authenticated") unless defined($no_syslog);
|
4193
|
|
4194
|
return undef;
|
4195
|
}
|
4196
|
|
4197
|
|
4198
|
#
|
4199
|
# reload things
|
4200
|
#
|
4201
|
sub reload {
|
4202
|
my (@what) = @_;
|
4203
|
|
4204
|
for (@what) {
|
4205
|
if ($_ eq "auth") {
|
4206
|
load_auth;
|
4207
|
} else {
|
4208
|
return undef;
|
4209
|
}
|
4210
|
}
|
4211
|
|
4212
|
return 1;
|
4213
|
}
|
4214
|
|
4215
|
|
4216
|
sub err_startup {
|
4217
|
my ($startup, $msg) = @_;
|
4218
|
|
4219
|
if ($startup) {
|
4220
|
die "$msg\n";
|
4221
|
} else {
|
4222
|
syslog ('err', $msg);
|
4223
|
}
|
4224
|
}
|
4225
|
|
4226
|
|
4227
|
#
|
4228
|
# handle a trap
|
4229
|
#
|
4230
|
sub handle_trap {
|
4231
|
my ($buf, $from) = @_;
|
4232
|
|
4233
|
my $time = time;
|
4234
|
my %trap = ();
|
4235
|
my $flags = 0;
|
4236
|
my $tmnow = time;
|
4237
|
my $intended;
|
4238
|
my $fromip;
|
4239
|
|
4240
|
#
|
4241
|
# MON-specific tags
|
4242
|
# pro protocol
|
4243
|
# aut auth
|
4244
|
# usr username
|
4245
|
# pas password
|
4246
|
# typ type ("failure", "up", "startup", "trap", "traptimeout")
|
4247
|
# spc specific type (STAT_OK, etc.) THIS IS NO LONGER USED
|
4248
|
# seq sequence
|
4249
|
# grp group
|
4250
|
# svc service
|
4251
|
# hst host
|
4252
|
# sta status (same as exit status of a monitor)
|
4253
|
# tsp timestamp as time(2) value
|
4254
|
# sum summary output
|
4255
|
# dtl detail
|
4256
|
#
|
4257
|
|
4258
|
#
|
4259
|
# this part validates the trap
|
4260
|
#
|
4261
|
{
|
4262
|
foreach my $line (split (/\n/, $buf))
|
4263
|
{
|
4264
|
if ($line =~ /^(\w+)=(.*)/)
|
4265
|
{
|
4266
|
my $trap_name = $1;
|
4267
|
my $trap_val = $2;
|
4268
|
chomp $trap_val;
|
4269
|
$trap_val =~ s/^\'(.*)\'$/\1/;
|
4270
|
$trap{$trap_name} = un_esc_str ($trap_val);
|
4271
|
}
|
4272
|
|
4273
|
else
|
4274
|
{
|
4275
|
syslog ('err', "unspecified tag in trap: $line");
|
4276
|
}
|
4277
|
}
|
4278
|
|
4279
|
$trap{"sum"} = "$trap{sum}\n" if ($trap{"sum"} !~ /\n$/);
|
4280
|
|
4281
|
my ($port, $addr) = sockaddr_in ($from);
|
4282
|
$fromip = inet_ntoa ($addr);
|
4283
|
|
4284
|
#
|
4285
|
# trap authentication
|
4286
|
#
|
4287
|
my ($traphost, $trapuser, $trappass);
|
4288
|
|
4289
|
if (defined ($AUTHTRAPS{"*"}))
|
4290
|
{
|
4291
|
$traphost = "*";
|
4292
|
}
|
4293
|
|
4294
|
else
|
4295
|
{
|
4296
|
$traphost = $fromip;
|
4297
|
}
|
4298
|
|
4299
|
if (defined ($AUTHTRAPS{$traphost}{"*"}))
|
4300
|
{
|
4301
|
$trapuser = "*";
|
4302
|
$trappass = "";
|
4303
|
}
|
4304
|
|
4305
|
else
|
4306
|
{
|
4307
|
$trapuser = $trap{"usr"};
|
4308
|
$trappass = $trap{"pas"};
|
4309
|
}
|
4310
|
|
4311
|
if (!defined ($AUTHTRAPS{$traphost}))
|
4312
|
{
|
4313
|
syslog ('err', "received trap from unauthorized host: $fromip");
|
4314
|
return undef;
|
4315
|
}
|
4316
|
|
4317
|
if ($trapuser ne "*") {
|
4318
|
if (!defined $AUTHTRAPS{$traphost}{$trapuser} ||
|
4319
|
crypt ($trappass, $AUTHTRAPS{$traphost}{$trapuser}) ne
|
4320
|
$AUTHTRAPS{$traphost}{$trapuser})
|
4321
|
{
|
4322
|
syslog ('err', "received trap from unauthorized user $trapuser, host $traphost");
|
4323
|
return undef;
|
4324
|
}
|
4325
|
}
|
4326
|
|
4327
|
#
|
4328
|
# protocol version
|
4329
|
#
|
4330
|
if ($trap{"pro"} < $TRAP_PRO_VERSION)
|
4331
|
{
|
4332
|
syslog ('err', "cannot handle traps from version less than $TRAP_PRO_VERSION");
|
4333
|
return undef;
|
4334
|
}
|
4335
|
|
4336
|
#
|
4337
|
# validate trap type
|
4338
|
#
|
4339
|
if (!defined $trap{"sta"})
|
4340
|
{
|
4341
|
syslog ('err', "no trap sta value specified from $fromip");
|
4342
|
return undef;
|
4343
|
}
|
4344
|
|
4345
|
#
|
4346
|
# if mon receives a trap for an unknown group/service, then the
|
4347
|
# default/default group/service should catch these if it is defined
|
4348
|
#
|
4349
|
if (!defined $watch{$trap{"grp"}} && defined $watch{"default"})
|
4350
|
{
|
4351
|
$intended = "$trap{'grp'}:$trap{'svc'}";
|
4352
|
$trap{"grp"} = "default";
|
4353
|
}
|
4354
|
|
4355
|
if ($trap{"grp"} eq 'default'
|
4356
|
&& !defined($watch{default}->{$trap{"svc"}})
|
4357
|
&& defined($watch{'default'}->{'default'}))
|
4358
|
{
|
4359
|
$trap{"svc"} = "default";
|
4360
|
}
|
4361
|
|
4362
|
if (!defined ($groups{$trap{"grp"}}))
|
4363
|
{
|
4364
|
syslog ('err', "trap received for undefined group $trap{grp}");
|
4365
|
return;
|
4366
|
}
|
4367
|
|
4368
|
elsif (!defined $watch{$trap{"grp"}}->{$trap{"svc"}})
|
4369
|
{
|
4370
|
syslog ('err', "trap received for undefined service type $trap{grp}/$trap{svc}");
|
4371
|
return;
|
4372
|
}
|
4373
|
}
|
4374
|
|
4375
|
#
|
4376
|
# trap has been validated, proceed
|
4377
|
#
|
4378
|
my $sref = \%{$watch{$trap{"grp"}}->{$trap{"svc"}}};
|
4379
|
|
4380
|
#
|
4381
|
# a trap recieved resets the trap timeout timer
|
4382
|
#
|
4383
|
if (exists $sref->{"traptimeout"})
|
4384
|
{
|
4385
|
$sref->{"_trap_timer"} = $sref->{"traptimeout"};
|
4386
|
}
|
4387
|
|
4388
|
|
4389
|
$sref->{"_last_trap"} = $time;
|
4390
|
|
4391
|
if ($intended)
|
4392
|
{
|
4393
|
$sref->{"_intended"} = $intended;
|
4394
|
}
|
4395
|
|
4396
|
syslog ('info', "trap $trap{typ} $trap{spc} from " .
|
4397
|
"$fromip grp=$trap{grp} svc=$trap{svc}, sta=$trap{sta}\n");
|
4398
|
|
4399
|
$sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
|
4400
|
if ($sref->{"trapduration"});
|
4401
|
|
4402
|
process_event ("t", $trap{"grp"}, $trap{"svc"}, $trap{"sta"}, "$trap{sum}\n$trap{dtl}");
|
4403
|
|
4404
|
if( defined($sref->{"_intended"}) )
|
4405
|
{
|
4406
|
undef($sref->{"_intended"});
|
4407
|
}
|
4408
|
}
|
4409
|
|
4410
|
|
4411
|
#
|
4412
|
# trap timeout
|
4413
|
#
|
4414
|
sub handle_trap_timeout {
|
4415
|
my ($group, $service) = @_;
|
4416
|
my ($tmnow);
|
4417
|
|
4418
|
$tmnow = time;
|
4419
|
|
4420
|
my $sref = \%{$watch{$group}->{$service}};
|
4421
|
$sref->{"_trap_timer"} = $sref->{"traptimeout"};
|
4422
|
process_event ("T", $group, $service, 1,
|
4423
|
"trap timeout\n" .
|
4424
|
"trap timeout after " . $sref->{"traptimeout"} . "s at " . localtime ($tmnow) . "\n");
|
4425
|
}
|
4426
|
|
4427
|
|
4428
|
#
|
4429
|
# write to a socket
|
4430
|
#
|
4431
|
sub sock_write {
|
4432
|
my ($sock, $buf) = @_;
|
4433
|
my ($nleft, $nwritten);
|
4434
|
|
4435
|
$nleft = length ($buf);
|
4436
|
while ($nleft) {
|
4437
|
$nwritten = syswrite ($sock, $buf, $nleft);
|
4438
|
if (!defined ($nwritten)) {
|
4439
|
return undef if ($! != EAGAIN);
|
4440
|
usleep (100000);
|
4441
|
next;
|
4442
|
}
|
4443
|
$nleft -= $nwritten;
|
4444
|
substr ($buf, 0, $nwritten) = "";
|
4445
|
}
|
4446
|
}
|
4447
|
|
4448
|
|
4449
|
#
|
4450
|
# do I/O processing for traps and client connections
|
4451
|
#
|
4452
|
sub handle_io {
|
4453
|
|
4454
|
#
|
4455
|
# build iovec for server connections, traps, and clients
|
4456
|
#
|
4457
|
$iovec = '';
|
4458
|
my $niovec = '';
|
4459
|
vec ($iovec, fileno (TRAPSERVER), 1) = 1;
|
4460
|
vec ($iovec, fileno (SERVER), 1) = 1;
|
4461
|
foreach my $cl (keys %clients) {
|
4462
|
vec ($iovec, $cl, 1) = 1;
|
4463
|
}
|
4464
|
|
4465
|
#
|
4466
|
# handle client I/O while there is some to handle
|
4467
|
#
|
4468
|
my $sleep = $SLEEPINT;
|
4469
|
my $tm0 = [gettimeofday];
|
4470
|
my $n;
|
4471
|
while ($n = select ($niovec = $iovec, undef, undef, $sleep)) {
|
4472
|
my $tm1 = [gettimeofday];
|
4473
|
|
4474
|
if ($! != &EINTR)
|
4475
|
{
|
4476
|
#
|
4477
|
# mon trap
|
4478
|
#
|
4479
|
if (vec ($niovec, fileno (TRAPSERVER), 1)) {
|
4480
|
my ($from, $trapbuf);
|
4481
|
if (!defined ($from = recv (TRAPSERVER, $trapbuf, 65536, 0))) {
|
4482
|
syslog ('err', "error trying to recv a trap: $!");
|
4483
|
} else {
|
4484
|
handle_trap ($trapbuf, $from);
|
4485
|
}
|
4486
|
next;
|
4487
|
|
4488
|
#
|
4489
|
# client connections
|
4490
|
#
|
4491
|
} elsif (vec ($niovec, fileno (SERVER), 1)) {
|
4492
|
client_accept;
|
4493
|
}
|
4494
|
|
4495
|
#
|
4496
|
# read data from clients if any exists
|
4497
|
#
|
4498
|
if ($numclients) {
|
4499
|
foreach my $cl (keys %clients) {
|
4500
|
next if (!vec ($niovec, $cl, 1));
|
4501
|
|
4502
|
my $buf = '';
|
4503
|
$n = sysread ($clients{$cl}->{"fhandle"}, $buf, 8192);
|
4504
|
if ($n == 0 && $! != &EAGAIN) {
|
4505
|
client_close ($cl);
|
4506
|
} elsif (!defined $n) {
|
4507
|
client_close ($cl, "read error: $!");
|
4508
|
} else {
|
4509
|
$clients{$cl}->{"buf"} .= $buf;
|
4510
|
$clients{$cl}->{"timeout"} = $CF{"CLIENT_TIMEOUT"};
|
4511
|
$clients{$cl}->{"last_read"} = time;
|
4512
|
}
|
4513
|
}
|
4514
|
}
|
4515
|
}
|
4516
|
|
4517
|
#
|
4518
|
# execute client commands which have been read
|
4519
|
#
|
4520
|
client_dopending if ($numclients);
|
4521
|
|
4522
|
last if (tv_interval ($tm0, $tm1) >= $SLEEPINT);
|
4523
|
|
4524
|
$sleep = $SLEEPINT - tv_interval ($tm0, $tm1);
|
4525
|
}
|
4526
|
|
4527
|
if (!defined ($n)) {
|
4528
|
syslog ('err', "select returned an error for I/O loop: $!");
|
4529
|
}
|
4530
|
|
4531
|
#
|
4532
|
# count down client inactivity timeouts and close expired connections
|
4533
|
#
|
4534
|
if ($numclients) {
|
4535
|
foreach my $cl (keys %clients) {
|
4536
|
my $timenow = time;
|
4537
|
$clients{$cl}->{"timeout"} = $timenow - $clients{$cl}->{"last_read"};
|
4538
|
|
4539
|
if ($clients{$cl}->{"timeout"} >= $CF{"CLIENT_TIMEOUT"}) {
|
4540
|
client_close ($cl, "timeout after $CF{CLIENT_TIMEOUT}s");
|
4541
|
}
|
4542
|
}
|
4543
|
}
|
4544
|
}
|
4545
|
|
4546
|
|
4547
|
#
|
4548
|
# generate alert and monitor path hashes
|
4549
|
#
|
4550
|
sub gen_scriptdir_hash {
|
4551
|
my ($d, @scriptdirs, @alertdirs, $found);
|
4552
|
|
4553
|
%MONITORHASH = ();
|
4554
|
%ALERTHASH = ();
|
4555
|
|
4556
|
foreach $d (split (/\s*:\s*/, $CF{"SCRIPTDIR"})) {
|
4557
|
if (-d "$d" && -x "$d") {
|
4558
|
push (@scriptdirs, $d);
|
4559
|
} else {
|
4560
|
syslog ('err', "scriptdir $d is not usable");
|
4561
|
}
|
4562
|
}
|
4563
|
|
4564
|
foreach $d (split (/\s*:\s*/, $CF{"ALERTDIR"})) {
|
4565
|
if (-d $d && -x $d) {
|
4566
|
push (@alertdirs, $d);
|
4567
|
} else {
|
4568
|
syslog ('err', "alertdir $d is not usable");
|
4569
|
}
|
4570
|
}
|
4571
|
|
4572
|
#
|
4573
|
# monitors
|
4574
|
#
|
4575
|
foreach my $group (keys %watch) {
|
4576
|
foreach my $service (keys %{$watch{$group}}) {
|
4577
|
next if (!defined $watch{$group}->{$service}->{"monitor"});
|
4578
|
my $monitor = (split (/\s+/, $watch{$group}->{$service}->{"monitor"}))[0];
|
4579
|
$found = 0;
|
4580
|
foreach (@scriptdirs) {
|
4581
|
if (-x "$_/$monitor") {
|
4582
|
$MONITORHASH{$monitor} = "$_/$monitor"
|
4583
|
unless (defined $MONITORHASH{$monitor});
|
4584
|
$found++;
|
4585
|
last;
|
4586
|
}
|
4587
|
}
|
4588
|
if (!$found) {
|
4589
|
syslog ('err', "$monitor not found in one of (\@scriptdirs[@scriptdirs])");
|
4590
|
}
|
4591
|
}
|
4592
|
}
|
4593
|
|
4594
|
#
|
4595
|
# alerts
|
4596
|
#
|
4597
|
foreach my $group (keys %watch) {
|
4598
|
foreach my $service (keys %{$watch{$group}}) {
|
4599
|
if ($watch{$group}->{$service}->{"redistribute"} ne '') {
|
4600
|
my $alert = $watch{$group}->{$service}->{"redistribute"};
|
4601
|
$found = 0;
|
4602
|
foreach (@alertdirs) {
|
4603
|
if (-x "$_/$alert") {
|
4604
|
$ALERTHASH{$alert} = "$_/$alert"
|
4605
|
unless (defined $ALERTHASH{$alert});
|
4606
|
$found++;
|
4607
|
}
|
4608
|
}
|
4609
|
if (!$found) {
|
4610
|
syslog ('err', "$alert not found in one of (\@alerttdirs[@alertdirs])");
|
4611
|
}
|
4612
|
}
|
4613
|
foreach my $period (keys %{$watch{$group}->{$service}->{"periods"}}) {
|
4614
|
foreach my $my_alert (
|
4615
|
@{$watch{$group}->{$service}->{"periods"}->{$period}->{"alerts"}},
|
4616
|
@{$watch{$group}->{$service}->{"periods"}->{$period}->{"upalerts"}},
|
4617
|
@{$watch{$group}->{$service}->{"periods"}->{$period}->{"startupalerts"}},
|
4618
|
@{$watch{$group}->{$service}->{"periods"}->{$period}->{"ackalerts"}},
|
4619
|
@{$watch{$group}->{$service}->{"periods"}->{$period}->{"disablealerts"}},
|
4620
|
) {
|
4621
|
my $alert = $my_alert;
|
4622
|
$alert =~ s/^(\S+=\S+ )*(\S+).*$/$2/;
|
4623
|
$found = 0;
|
4624
|
foreach (@alertdirs) {
|
4625
|
if (-x "$_/$alert") {
|
4626
|
$ALERTHASH{$alert} = "$_/$alert"
|
4627
|
unless (defined $ALERTHASH{$alert});
|
4628
|
$found++;
|
4629
|
}
|
4630
|
}
|
4631
|
if (!$found) {
|
4632
|
syslog ('err', "$alert not found in one of (\@alerttdirs[@alertdirs])");
|
4633
|
}
|
4634
|
}
|
4635
|
}
|
4636
|
}
|
4637
|
}
|
4638
|
|
4639
|
}
|
4640
|
|
4641
|
|
4642
|
#
|
4643
|
# do some processing on dirs
|
4644
|
#
|
4645
|
sub normalize_paths {
|
4646
|
|
4647
|
my ($authtype, @authtypes);
|
4648
|
|
4649
|
#
|
4650
|
# do some sanity checks on dirs
|
4651
|
#
|
4652
|
$CF{"STATEDIR"} = "$CF{BASEDIR}/$CF{STATEDIR}" if ($CF{"STATEDIR"} !~ m{^/});
|
4653
|
syslog ('err', "$CF{STATEDIR} does not exist") if (! -d $CF{"STATEDIR"});
|
4654
|
|
4655
|
$CF{"LOGDIR"} = "$CF{BASEDIR}/$CF{LOGDIR}" if ($CF{"LOGDIR"} !~ m{^/});
|
4656
|
syslog ('err', "$CF{LOGDIR} does not exist") if (! -d $CF{LOGDIR});
|
4657
|
|
4658
|
|
4659
|
$CF{"AUTHFILE"} = "$CF{CFBASEDIR}/$CF{AUTHFILE}"
|
4660
|
if ($CF{"AUTHFILE"} !~ m{^/});
|
4661
|
syslog ('err', "$CF{AUTHFILE} does not exist")
|
4662
|
if (! -f $CF{"AUTHFILE"});
|
4663
|
|
4664
|
@authtypes = split(' ' , $CF{"AUTHTYPE"}) ;
|
4665
|
foreach $authtype (@authtypes) {
|
4666
|
if ($authtype eq "userfile") {
|
4667
|
$CF{"USERFILE"} = "$CF{CFBASEDIR}/$CF{USERFILE}"
|
4668
|
if ($CF{"USERFILE"} !~ m{^/});
|
4669
|
syslog ('err', "$CF{USERFILE} does not exist")
|
4670
|
if (! -f $CF{"USERFILE"});
|
4671
|
}
|
4672
|
}
|
4673
|
|
4674
|
$CF{"DTLOGFILE"} = "$CF{LOGDIR}/$CF{DTLOGFILE}"
|
4675
|
if ($CF{"DTLOGFILE"} !~ m{^/});
|
4676
|
|
4677
|
if ($CF{"HISTORICFILE"} ne "") {
|
4678
|
$CF{"HISTORICFILE"} = "$CF{LOGDIR}/$CF{HISTORICFILE}"
|
4679
|
if ($CF{"HISTORICFILE"} !~ m{^/});
|
4680
|
}
|
4681
|
|
4682
|
#
|
4683
|
# script and alert dirs may have multiple paths
|
4684
|
#
|
4685
|
foreach my $dir (\$CF{"SCRIPTDIR"}, \$CF{"ALERTDIR"}) {
|
4686
|
my @n;
|
4687
|
foreach my $d (split (/\s*:\s*/, $$dir)) {
|
4688
|
$d =~ s{/$}{};
|
4689
|
$d = "$CF{BASEDIR}/$d" if ($d !~ m{^/});
|
4690
|
syslog ('err', "$d does not exist, check your alertdir and mondir paths")
|
4691
|
unless (-d $d);
|
4692
|
push @n, $d;
|
4693
|
}
|
4694
|
$$dir = join (":", @n);
|
4695
|
}
|
4696
|
}
|
4697
|
|
4698
|
|
4699
|
#
|
4700
|
# set opstatus and save old status
|
4701
|
#
|
4702
|
sub set_op_status {
|
4703
|
my ($group, $service, $status) = @_;
|
4704
|
|
4705
|
$watch{$group}->{$service}->{"_last_op_status"} =
|
4706
|
$watch{$group}->{$service}->{"_op_status"};
|
4707
|
$watch{$group}->{$service}->{"_op_status"} = $status;
|
4708
|
}
|
4709
|
|
4710
|
|
4711
|
sub debug_dir {
|
4712
|
print STDERR <<EOF;
|
4713
|
basedir [$CF{BASEDIR}]
|
4714
|
cfbasedir [$CF{CFBASEDIR}]
|
4715
|
|
4716
|
cf [$CF{CF}]
|
4717
|
statedir [$CF{STATEDIR}]
|
4718
|
logdir [$CF{LOGDIR}]
|
4719
|
authfile [$CF{AUTHFILE}]
|
4720
|
userfile [$CF{USERFILE}]
|
4721
|
dtlogfile [$CF{DTLOGFILE}]
|
4722
|
historicfile[$CF{HISTORICFILE}]
|
4723
|
monerrfile [$CF{MONERRFILE}]
|
4724
|
scriptdir [$CF{SCRIPTDIR}]
|
4725
|
alertdir [$CF{ALERTDIR}]
|
4726
|
EOF
|
4727
|
|
4728
|
foreach my $m (keys %MONITORHASH) {
|
4729
|
print STDERR "M $m=[$MONITORHASH{$m}]\n";
|
4730
|
}
|
4731
|
foreach my $m (keys %ALERTHASH) {
|
4732
|
print STDERR "A $m=[$ALERTHASH{$m}]\n";
|
4733
|
}
|
4734
|
}
|
4735
|
|
4736
|
|
4737
|
#
|
4738
|
# globals affected by config file are
|
4739
|
# all stored in %CF
|
4740
|
#
|
4741
|
sub init_cf_globals {
|
4742
|
$CF{"BASEDIR"} = $opt{"b"} || "/usr/lib/mon";
|
4743
|
$CF{"BASEDIR"} =~ s{/$}{};
|
4744
|
$CF{"CFBASEDIR"} = $opt{"B"} || "/etc/mon";
|
4745
|
$CF{"CF"} = $opt{"c"} || "$CF{CFBASEDIR}/mon.cf";
|
4746
|
$CF{"CF"} = "$PWD/$CF{CF}" if ($CF{"CF"} !~ /^\//);
|
4747
|
$CF{"SCRIPTDIR"} = "/usr/local/lib/mon/mon.d:mon.d";
|
4748
|
$CF{"ALERTDIR"} = "/usr/local/lib/mon/alert.d:alert.d";
|
4749
|
$CF{"LOGDIR"} = $opt{"L"} || (-d "/var/log/mon" ? "/var/log/mon" : "log.d");
|
4750
|
$CF{"STATEDIR"} = -d "/var/state/mon" ? "/var/state/mon"
|
4751
|
: -d "/var/lib/mon" ? "/var/lib/mon"
|
4752
|
: "state.d";
|
4753
|
$CF{"AUTHFILE"} = "auth.cf";
|
4754
|
$CF{"AUTHTYPE"} = "getpwnam";
|
4755
|
$CF{"PAMSERVICE"} = "passwd";
|
4756
|
$CF{"USERFILE"} = "monusers.cf";
|
4757
|
$CF{"PIDFILE"} = (-d "/var/run/mon" ? "/var/run/mon"
|
4758
|
: -d "/var/run" ? "/var/run"
|
4759
|
: "/etc") . "/mon.pid";
|
4760
|
$CF{"MONERRFILE"} = "/dev/null";
|
4761
|
$CF{"DTLOGFILE"} = "downtime.log";
|
4762
|
$CF{"DTLOGGING"} = 0;
|
4763
|
$CF{"MAX_KEEP"} = 100;
|
4764
|
$CF{"CLIENT_TIMEOUT"} = 30;
|
4765
|
$CF{"SERVPORT"} = getservbyname ("mon", "tcp") || 2583;
|
4766
|
$CF{"TRAPPORT"} = getservbyname ("mon", "udp") || 2583;
|
4767
|
$CF{"CLIENTALLOW"} = '\d+.\d+.\d+.\d+';
|
4768
|
$CF{"MAXPROCS"} = 0;
|
4769
|
$CF{"HISTORICFILE"} = "";
|
4770
|
$CF{"HISTORICTIME"} = 0;
|
4771
|
$CF{"DEP_RECUR_LIMIT"} = 10;
|
4772
|
$CF{"SYSLOG_FACILITY"} = $opt{"O"} || "daemon";
|
4773
|
$CF{"STARTUPALERTS_ON_RESET"} = 0;
|
4774
|
$CF{"MONREMOTE"} = undef;
|
4775
|
}
|
4776
|
|
4777
|
|
4778
|
#
|
4779
|
# globals not affected by config file
|
4780
|
#
|
4781
|
sub init_globals {
|
4782
|
$TRAP_PRO_VERSION = 0.3807;
|
4783
|
$SLEEPINT = 1;
|
4784
|
$STOPPED = 0;
|
4785
|
$STOPPED_TIME = 0;
|
4786
|
$START_TIME = time;
|
4787
|
$PROT_VERSION = 0x2611;
|
4788
|
$HOSTNAME = hostname;
|
4789
|
$PWD = getcwd;
|
4790
|
|
4791
|
#
|
4792
|
# flags
|
4793
|
#
|
4794
|
$FL_MONITOR = 1;
|
4795
|
$FL_UPALERT = 2;
|
4796
|
$FL_TRAP = 4;
|
4797
|
$FL_TRAPTIMEOUT = 8;
|
4798
|
$FL_STARTUPALERT = 16;
|
4799
|
$FL_TEST = 32;
|
4800
|
$FL_REDISTRIBUTE = 64;
|
4801
|
$FL_ACKALERT = 128;
|
4802
|
$FL_DISABLEALERT = 256;
|
4803
|
|
4804
|
#
|
4805
|
# specific trap types
|
4806
|
#
|
4807
|
($TRAP_COLDSTART, $TRAP_WARMSTART, $TRAP_LINKDOWN, $TRAP_LINKUP,
|
4808
|
$TRAP_AUTHFAIL, $TRAP_EGPNEIGHBORLOSS, $TRAP_ENTERPRISE, $TRAP_HEARTBEAT) = (0..7);
|
4809
|
|
4810
|
#
|
4811
|
# operational statuses
|
4812
|
#
|
4813
|
($STAT_FAIL, $STAT_OK, $STAT_COLDSTART, $STAT_WARMSTART, $STAT_LINKDOWN,
|
4814
|
$STAT_UNKNOWN, $STAT_TIMEOUT, $STAT_UNTESTED, $STAT_DEPEND, $STAT_WARN) = (0..9);
|
4815
|
|
4816
|
%FAILURE = (
|
4817
|
$STAT_FAIL => 1,
|
4818
|
$STAT_LINKDOWN => 1,
|
4819
|
$STAT_TIMEOUT => 1,
|
4820
|
);
|
4821
|
|
4822
|
%SUCCESS = (
|
4823
|
$STAT_OK => 1,
|
4824
|
$STAT_COLDSTART => 1,
|
4825
|
$STAT_WARMSTART => 1,
|
4826
|
$STAT_UNKNOWN => 1,
|
4827
|
$STAT_UNTESTED => 1,
|
4828
|
);
|
4829
|
|
4830
|
%WARNING = (
|
4831
|
$STAT_COLDSTART => 1,
|
4832
|
$STAT_WARMSTART => 1,
|
4833
|
$STAT_UNKNOWN => 1,
|
4834
|
$STAT_WARN => 1,
|
4835
|
);
|
4836
|
|
4837
|
%OPSTAT = ("fail" => $STAT_FAIL, "ok" => $STAT_OK, "coldstart" => $STAT_COLDSTART,
|
4838
|
"warmstart" => $STAT_WARMSTART, "linkdown" => $STAT_LINKDOWN,
|
4839
|
"unknown" => $STAT_UNKNOWN, "timeout" => $STAT_TIMEOUT,
|
4840
|
"untested" => $STAT_UNTESTED);
|
4841
|
|
4842
|
#
|
4843
|
# fast lookup hashes for alerts and monitors
|
4844
|
#
|
4845
|
%MONITORHASH = ();
|
4846
|
%ALERTHASH = ();
|
4847
|
}
|
4848
|
|
4849
|
|
4850
|
#
|
4851
|
# clear timers
|
4852
|
#
|
4853
|
sub clear_timers {
|
4854
|
my ($group, $service) = @_;
|
4855
|
|
4856
|
return undef if (!defined $watch{$group}->{$service});
|
4857
|
|
4858
|
my $sref = \%{$watch{$group}->{$service}};
|
4859
|
|
4860
|
$sref->{"_trap_timer"} = $sref->{"traptimeout"}
|
4861
|
if ($sref->{"traptimeout"});
|
4862
|
|
4863
|
$sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
|
4864
|
if ($sref->{"trapduration"});
|
4865
|
|
4866
|
$sref->{"_timer"} = $sref->{"interval"}
|
4867
|
if ($sref->{"interval"});
|
4868
|
|
4869
|
$sref->{"_consec_failures"} = 0
|
4870
|
if ($sref->{"_consec_failures"});
|
4871
|
|
4872
|
foreach my $period (keys %{$sref->{"periods"}}) {
|
4873
|
my $pref = \%{$sref->{"periods"}->{$period}};
|
4874
|
|
4875
|
$pref->{"_last_alert"} = 0
|
4876
|
if ($pref->{"alertevery"});
|
4877
|
|
4878
|
$pref->{"_consec_failures"} = 0
|
4879
|
if ($pref->{"alertafter_consec"});
|
4880
|
|
4881
|
$pref->{'_1stfailtime'} = 0
|
4882
|
if ($pref->{"alertafterival"});
|
4883
|
}
|
4884
|
}
|
4885
|
|
4886
|
|
4887
|
#
|
4888
|
# load some amount of the alert history into memory
|
4889
|
#
|
4890
|
sub readhistoricfile {
|
4891
|
return if ($CF{"HISTORICFILE"} eq "");
|
4892
|
|
4893
|
if (!open (HISTFILE, $CF{"HISTORICFILE"})) {
|
4894
|
syslog ('err', "Could not read history from $CF{HISTORICFILE} : $!");
|
4895
|
return;
|
4896
|
}
|
4897
|
|
4898
|
my $epochLimit = 0;
|
4899
|
if ($CF{"HISTORICTIME"} != 0) {
|
4900
|
$epochLimit = time - $CF{"HISTORICTIME"};
|
4901
|
}
|
4902
|
|
4903
|
@last_alerts = ();
|
4904
|
|
4905
|
while (<HISTFILE>) {
|
4906
|
next if (/^\s*$/ || /^\s*#/);
|
4907
|
chomp;
|
4908
|
my $epochAlert = (split(/\s+/))[3];
|
4909
|
push (@last_alerts, $_) if ($epochAlert >= $epochLimit);
|
4910
|
}
|
4911
|
|
4912
|
close (HISTFILE);
|
4913
|
|
4914
|
if (defined $CF{"MAX_KEEP"}) {
|
4915
|
splice(@last_alerts, 0, $#last_alerts + 1 - $CF{"MAX_KEEP"});
|
4916
|
}
|
4917
|
}
|
4918
|
|
4919
|
|
4920
|
#
|
4921
|
# This routine simply calls an alert.
|
4922
|
#
|
4923
|
# call with %args = (
|
4924
|
# group => "name of group",
|
4925
|
# service => "name of service",
|
4926
|
# pref => "optional period reference",
|
4927
|
# alert => "alert script",
|
4928
|
# args => "args to alert script",
|
4929
|
# flags => "flags, as in $FL_*",
|
4930
|
# retval => "return value of monitor",
|
4931
|
# output => "output of monitor",
|
4932
|
# )
|
4933
|
#
|
4934
|
sub call_alert {
|
4935
|
my (%args) = @_;
|
4936
|
|
4937
|
foreach my $mandatory_arg (qw(group service flags
|
4938
|
retval alert output)) {
|
4939
|
if (!exists $args{$mandatory_arg})
|
4940
|
{
|
4941
|
debug (1, "returning from call_alert because of missing arg $mandatory_arg\n");
|
4942
|
return (undef);
|
4943
|
}
|
4944
|
}
|
4945
|
|
4946
|
my @groupargs = grep (!/^\*/, @{$groups{$args{"group"}}});
|
4947
|
|
4948
|
my $tmnow = time;
|
4949
|
my ($summary) = split("\n", $args{"output"});
|
4950
|
$summary = "(NO SUMMARY)" if (!defined $summary || $summary =~ /^\s*$/m);
|
4951
|
|
4952
|
my $sref = \%{$watch{$args{"group"}}->{$args{"service"}}};
|
4953
|
my $pref;
|
4954
|
|
4955
|
if (defined $args{"pref"}) {
|
4956
|
$pref = $args{"pref"};
|
4957
|
}
|
4958
|
|
4959
|
if (! defined $args{"args"}) {
|
4960
|
$args{"args"} = '';
|
4961
|
}
|
4962
|
|
4963
|
my $alert = "";
|
4964
|
if (!defined $ALERTHASH{$args{"alert"}} ||
|
4965
|
! -f $ALERTHASH{$args{"alert"}}) {
|
4966
|
syslog ('err', "no alert found while trying to run $args{alert}");
|
4967
|
return undef;
|
4968
|
} else {
|
4969
|
$alert = $ALERTHASH{$args{"alert"}};
|
4970
|
}
|
4971
|
|
4972
|
my $alerttype = ""; # sent to syslog and stored in @last_alerts
|
4973
|
my $alert_type = "failure"; # MON_ALERTTYPE set to this
|
4974
|
if ($args{"flags"} & $FL_UPALERT) {
|
4975
|
$alerttype = "upalert";
|
4976
|
$alert_type = "up";
|
4977
|
} elsif ($args{"flags"} & $FL_STARTUPALERT) {
|
4978
|
$alerttype = "startupalert";
|
4979
|
$alert_type = "startup";
|
4980
|
} elsif ($args{"flags"} & $FL_ACKALERT) {
|
4981
|
$alerttype = "ackalert";
|
4982
|
$alert_type = "ack";
|
4983
|
} elsif ($args{"flags"} & $FL_DISABLEALERT) {
|
4984
|
$alerttype = "disablealert";
|
4985
|
$alert_type = "disable";
|
4986
|
} elsif ($args{"flags"} & $FL_TRAPTIMEOUT) {
|
4987
|
$alerttype = "traptimeoutalert";
|
4988
|
$alert_type = "traptimeout";
|
4989
|
} elsif ($args{"flags"} & $FL_TRAP) {
|
4990
|
$alerttype = "trapalert";
|
4991
|
$alert_type = "trap";
|
4992
|
} elsif ($args{"flags"} & $FL_TEST) {
|
4993
|
$alerttype = "testalert";
|
4994
|
$alert_type = "test";
|
4995
|
} else {
|
4996
|
$alerttype = "alert";
|
4997
|
}
|
4998
|
|
4999
|
#
|
5000
|
# log why we are triggering an alert
|
5001
|
#
|
5002
|
my $a = $alert;
|
5003
|
$a =~ s{^.*/([^/]+)$}{$1};
|
5004
|
syslog ("alert", "calling $alerttype $a for" .
|
5005
|
" $args{group}/$args{service} ($alert,$args{args}) $summary") if (!($args{"flags"} & $FL_REDISTRIBUTE));;
|
5006
|
|
5007
|
|
5008
|
# We may block while writing to the alert script, so we'll fork first, allowing the
|
5009
|
# master process to move on.
|
5010
|
|
5011
|
my $pid;
|
5012
|
if ($pid = fork()) { ## Master
|
5013
|
# Do Nothing
|
5014
|
} elsif (defined($pid)) { ## Child
|
5015
|
my $pid = open (ALERT, "|-");
|
5016
|
if (!defined $pid) {
|
5017
|
syslog ('err', "could not fork: $!");
|
5018
|
return undef;
|
5019
|
}
|
5020
|
|
5021
|
#
|
5022
|
# grandchild, the actual alert
|
5023
|
#
|
5024
|
if ($pid == 0) {
|
5025
|
#
|
5026
|
# set env variables to pass to the alert
|
5027
|
#
|
5028
|
foreach my $v (keys %{$sref->{"ENV"}}) {
|
5029
|
$ENV{$v} = $sref->{"ENV"}->{$v};
|
5030
|
}
|
5031
|
|
5032
|
$ENV{"MON_LAST_SUMMARY"} = $sref->{"_last_summary"} if (defined $sref->{"_last_summary"});
|
5033
|
$ENV{"MON_LAST_OUTPUT"} = $sref->{"_last_output"} if (defined $sref->{"_last_output"});
|
5034
|
$ENV{"MON_LAST_FAILURE"} = $sref->{"_last_failure"} if (defined $sref->{"_last_failure"});
|
5035
|
$ENV{"MON_FIRST_FAILURE"} = $sref->{"_first_failure"} if (defined $sref->{"_first_failure"});
|
5036
|
$ENV{"MON_FIRST_SUCCESS"} = $sref->{"_first_success"} if (defined $sref->{"_last_success"});
|
5037
|
$ENV{"MON_LAST_SUCCESS"} = $sref->{"_last_success"} if (defined $sref->{"_last_success"});
|
5038
|
$ENV{"MON_DESCRIPTION"} = $sref->{"description"} if (defined $sref->{"description"});
|
5039
|
$ENV{"MON_GROUP"} = $args{"group"} if (defined $args{"group"});
|
5040
|
$ENV{"MON_SERVICE"} = $args{"service"} if (defined $args{"service"});
|
5041
|
$ENV{"MON_RETVAL"} = $args{"retval"} if (defined $args{"retval"});
|
5042
|
$ENV{"MON_OPSTATUS"} = $sref->{"_op_status"} if (defined $sref->{"_op_status"});
|
5043
|
$ENV{"MON_LAST_OPSTATUS"} = $sref->{"_last_op_status"} if (defined $sref->{"_last_op_status"});
|
5044
|
$ENV{"MON_ACK"} = $sref->{"_ack_comment"} if ($sref->{"_ack"} && $sref->{"_ack_comment"} ne "");
|
5045
|
$ENV{"MON_ALERTTYPE"} = $alert_type;
|
5046
|
$ENV{"MON_STATEDIR"} = $CF{"STATEDIR"};
|
5047
|
$ENV{"MON_LOGDIR"} = $CF{"LOGDIR"};
|
5048
|
$ENV{"MON_CFBASEDIR"} = $CF{"CFBASEDIR"};
|
5049
|
|
5050
|
if( defined($sref->{"_intended"}) )
|
5051
|
{
|
5052
|
$ENV{"MON_TRAP_INTENDED"} = $sref->{"_intended"};
|
5053
|
}
|
5054
|
|
5055
|
else
|
5056
|
{
|
5057
|
undef ($ENV{"MON_TRAP_INTENDED"}) if (defined($ENV{"MON_TRAP_INTENDED"}));
|
5058
|
}
|
5059
|
|
5060
|
my $t;
|
5061
|
$t = "-u" if ($args{"flags"} & $FL_UPALERT);
|
5062
|
$t = "-a" if ($args{"flags"} & $FL_ACKALERT);
|
5063
|
$t = "-D" if ($args{"flags"} & $FL_DISABLEALERT);
|
5064
|
$t = "-T" if ($args{"flags"} & $FL_TRAP);
|
5065
|
$t = "-O" if ($args{"flags"} & $FL_TRAPTIMEOUT);
|
5066
|
|
5067
|
my @execargs = (
|
5068
|
$alert,
|
5069
|
"-s", "$args{service}",
|
5070
|
"-g", "$args{group}",
|
5071
|
"-h", "@groupargs",
|
5072
|
"-t", "$tmnow",
|
5073
|
);
|
5074
|
|
5075
|
if ($t) {
|
5076
|
push @execargs, $t;
|
5077
|
}
|
5078
|
|
5079
|
if ($args{"args"} ne "") {
|
5080
|
push @execargs, quotewords('\s+',0,$args{"args"});
|
5081
|
}
|
5082
|
|
5083
|
if (!exec @execargs) {
|
5084
|
syslog ('err', "could not exec alert $alert: $!");
|
5085
|
return undef;
|
5086
|
}
|
5087
|
exit;
|
5088
|
}
|
5089
|
|
5090
|
#
|
5091
|
# this will block if the alert is sucking gas, which is why we forked above
|
5092
|
#
|
5093
|
print ALERT $args{"output"};
|
5094
|
close (ALERT);
|
5095
|
exit;
|
5096
|
}
|
5097
|
|
5098
|
#
|
5099
|
# test alerts and redistributions don't count
|
5100
|
#
|
5101
|
return (1) if ($args{"flags"} & ($FL_TEST | $FL_REDISTRIBUTE));
|
5102
|
|
5103
|
#
|
5104
|
# tally this alert
|
5105
|
#
|
5106
|
if (defined $args{"pref"}) {
|
5107
|
$pref->{"_last_alert"} = $tmnow;
|
5108
|
}
|
5109
|
$sref->{"_alert_count"}++;
|
5110
|
|
5111
|
#
|
5112
|
# store this in the log
|
5113
|
#
|
5114
|
shift @last_alerts if (@last_alerts > $CF{"MAX_KEEP"});
|
5115
|
|
5116
|
my $alertline = "$alerttype $args{group} $args{service}" .
|
5117
|
" $tmnow $alert ($args{args}) $summary";
|
5118
|
push @last_alerts, $alertline;
|
5119
|
|
5120
|
#
|
5121
|
# append to alert history file
|
5122
|
#
|
5123
|
if ($CF{"HISTORICFILE"} ne "") {
|
5124
|
if (!open (HISTFILE, ">>$CF{HISTORICFILE}")) {
|
5125
|
syslog ('err', "Could not append alert history to $CF{HISTORICFILE}: $!");
|
5126
|
} else {
|
5127
|
print HISTFILE $alertline, "\n";
|
5128
|
close (HISTFILE);
|
5129
|
}
|
5130
|
}
|
5131
|
|
5132
|
return 1;
|
5133
|
}
|
5134
|
|
5135
|
|
5136
|
#
|
5137
|
# recursively evaluate a dependency expression
|
5138
|
# substitutes "GROUP:SERVICE" with "1" or "0" if the service is pass/fail, resp.
|
5139
|
#
|
5140
|
# returns an anonymous hash reference
|
5141
|
#
|
5142
|
# {
|
5143
|
# status =>, # "D" recursion depth exceeded
|
5144
|
# # "O" everything is OK
|
5145
|
# # "E" eval error
|
5146
|
# depend =>, # 1 for success (no deps in a failure state)
|
5147
|
# # 0 if any deps failed
|
5148
|
# error =>, # the textual error associated with "D" or "E" status
|
5149
|
# }
|
5150
|
#
|
5151
|
sub depend {
|
5152
|
my ($depend, $depth, $deptype) = @_;
|
5153
|
debug (2, "checking DEP [$depend]\n");
|
5154
|
|
5155
|
if ($depth > $CF{"DEP_RECUR_LIMIT"}) {
|
5156
|
return {
|
5157
|
status => "D",
|
5158
|
depend => undef,
|
5159
|
error => "recursion too deep for ($depend)",
|
5160
|
};
|
5161
|
}
|
5162
|
|
5163
|
foreach my $depstr ($depend =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g)
|
5164
|
{
|
5165
|
my ($group ,$service) = split(':', $depstr);
|
5166
|
|
5167
|
my $sref = \%{$watch{$group}->{$service}};
|
5168
|
my $depval = undef;
|
5169
|
my $subdepend = "";
|
5170
|
if (defined $sref->{"depend"} && $sref->{"dep_behavior"} eq $deptype) {
|
5171
|
$subdepend = $sref->{"depend"};
|
5172
|
} elsif ($deptype eq 'a' && defined $sref->{"alertdepend"}) {
|
5173
|
$subdepend = $sref->{"alertdepend"};
|
5174
|
} elsif ($deptype eq 'm' && defined $sref->{"monitordepend"}) {
|
5175
|
$subdepend = $sref->{"monitordepend"};
|
5176
|
}
|
5177
|
|
5178
|
#
|
5179
|
# disabled watches and services used to be counted as "passing"
|
5180
|
# now we'll use the actual values, to avoid having dependent services
|
5181
|
# alert when a broken service gets disabled
|
5182
|
#
|
5183
|
# if ((exists $watch_disabled{$group} && $watch_disabled{$group}) || (defined $sref->{"disable"} && $sref->{"disable"} == 1))
|
5184
|
# {
|
5185
|
# $depval = 1;
|
5186
|
#
|
5187
|
#
|
5188
|
# root dependency found
|
5189
|
#
|
5190
|
# }
|
5191
|
# elsif ($subdepend eq "")
|
5192
|
if ($subdepend eq "")
|
5193
|
{
|
5194
|
debug (2, " found root dep $group,$service\n");
|
5195
|
|
5196
|
$depval = $SUCCESS{$sref->{"_op_status"}} && ($sref->{"_last_failure_time"} < (time - $sref->{"dep_memory"}));
|
5197
|
|
5198
|
#
|
5199
|
# not a root dep, recurse
|
5200
|
#
|
5201
|
}
|
5202
|
else
|
5203
|
{
|
5204
|
#
|
5205
|
# do it recursively
|
5206
|
#
|
5207
|
my $dstatus = depend ($subdepend, $depth + 1, $deptype);
|
5208
|
debug (2,
|
5209
|
"recur depth $depth returned $dstatus->{status},$dstatus->{depend}\n");
|
5210
|
|
5211
|
#
|
5212
|
# a bad thing happened, bail out
|
5213
|
#
|
5214
|
if ($dstatus->{"status"} ne "O")
|
5215
|
{
|
5216
|
debug (2,
|
5217
|
"recursive dep failure for $group,$service (status=$dstatus->{status})\n");
|
5218
|
return $dstatus;
|
5219
|
}
|
5220
|
|
5221
|
$depval = $dstatus->{"depend"} && $SUCCESS{$sref->{"_op_status"}}
|
5222
|
&& ($sref->{"_last_failure_time"} < (time - $sref->{"dep_memory"}));
|
5223
|
}
|
5224
|
|
5225
|
my $v = int ($depval);
|
5226
|
debug (2, " ($group,$service) $depth depend=[$v][$depend]");
|
5227
|
$depend =~ s/\b$depstr\b/$v/g;
|
5228
|
debug (2, " depend=[$depend]\n");
|
5229
|
}
|
5230
|
|
5231
|
debug (2, " before eval: [$depend]");
|
5232
|
my $e = eval("$DEP_EVAL_SANDBOX $depend");
|
5233
|
debug (2, " after eval: [$e]\n");
|
5234
|
|
5235
|
if ($@ eq "")
|
5236
|
{
|
5237
|
return
|
5238
|
{
|
5239
|
status => "O",
|
5240
|
depend => $e,
|
5241
|
};
|
5242
|
|
5243
|
}
|
5244
|
else
|
5245
|
{
|
5246
|
return
|
5247
|
{
|
5248
|
status => "E",
|
5249
|
depend => $e,
|
5250
|
error => $@,
|
5251
|
};
|
5252
|
}
|
5253
|
}
|
5254
|
|
5255
|
|
5256
|
#
|
5257
|
# returns undef on error
|
5258
|
# 0 if dependency failure, sets _depend_status to 0
|
5259
|
# 1 if dependencies are OK, sets _depend_status to 1
|
5260
|
#
|
5261
|
sub dep_ok
|
5262
|
{
|
5263
|
my $sref = shift;
|
5264
|
my $deptype = shift;
|
5265
|
my $depend = "";
|
5266
|
if (defined $sref->{"depend"} && $sref->{"dep_behavior"} eq $deptype) {
|
5267
|
$depend = $sref->{"depend"};
|
5268
|
} elsif ($deptype eq 'a' && defined $sref->{"alertdepend"}) {
|
5269
|
$depend = $sref->{"alertdepend"};
|
5270
|
} elsif ($deptype eq 'm' && defined $sref->{"monitordepend"}) {
|
5271
|
$depend = $sref->{"monitordepend"};
|
5272
|
}
|
5273
|
|
5274
|
return 1 unless ($depend ne "");
|
5275
|
|
5276
|
my $s = depend ($depend, 0, $deptype);
|
5277
|
|
5278
|
if ($s->{"status"} eq "D")
|
5279
|
{
|
5280
|
debug (2, "dep recursion too deep\n");
|
5281
|
return undef;
|
5282
|
|
5283
|
}
|
5284
|
elsif ($s->{"status"} eq "E")
|
5285
|
{
|
5286
|
syslog ("notice", "eval error for dependency starting at $depend: ".$s->{error});
|
5287
|
return undef;
|
5288
|
}
|
5289
|
elsif ($s->{"status"} eq "O" && !$s->{"depend"})
|
5290
|
{
|
5291
|
$sref->{"_depend_status"} = 0;
|
5292
|
return 0;
|
5293
|
}
|
5294
|
|
5295
|
$sref->{"_depend_status"} = 1;
|
5296
|
|
5297
|
return 1;
|
5298
|
}
|
5299
|
|
5300
|
|
5301
|
#
|
5302
|
# returns undef on error
|
5303
|
# otherwise a reference to a list summaries from all
|
5304
|
# DIRECT dependencies currently failing
|
5305
|
sub dep_summary
|
5306
|
{
|
5307
|
my $sref = shift;
|
5308
|
my @sum;
|
5309
|
my @deps = ();
|
5310
|
|
5311
|
if (defined $sref->{"depend"} && $sref->{"dep_behavior"} eq "hm") {
|
5312
|
@deps = ($sref->{"depend"} =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g);
|
5313
|
} elsif (defined $sref->{"hostdepend"}) {
|
5314
|
@deps = ($sref->{"hostdepend"} =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g);
|
5315
|
}
|
5316
|
|
5317
|
return [] if (! @deps);
|
5318
|
|
5319
|
foreach (@deps) {
|
5320
|
my ($group, $service) = split /:/;
|
5321
|
if (!(exists $watch{$group} && exists $watch{$group}->{$service})) {
|
5322
|
return undef;
|
5323
|
}
|
5324
|
|
5325
|
if ($watch{$group}->{$service}{"_op_status"} == $STAT_FAIL) {
|
5326
|
push @sum, $watch{$group}->{$service}{"_last_summary"};
|
5327
|
} elsif ($watch{$group}->{$service}{"_last_failure_time"} >= (time - $watch{$group}->{$service}{"dep_memory"})) {
|
5328
|
push @sum, $watch{$group}->{$service}{"_last_failure_summary"};
|
5329
|
}
|
5330
|
}
|
5331
|
|
5332
|
return \@sum;
|
5333
|
}
|
5334
|
|
5335
|
#
|
5336
|
# convert a string to a hex-escaped string, returning
|
5337
|
# the escaped string.
|
5338
|
#
|
5339
|
# $str is the string to be escaped
|
5340
|
# if $inquotes is true, backslashes are doubled, making
|
5341
|
# the escaped string suitable to be enclosed in
|
5342
|
# single quotes and later passed to Text::quotewords.
|
5343
|
# For example, var='quoted value'
|
5344
|
#
|
5345
|
sub esc_str {
|
5346
|
my $str = shift;
|
5347
|
my $inquotes = shift;
|
5348
|
|
5349
|
my $escstr = "";
|
5350
|
|
5351
|
return $escstr if (!defined $str);
|
5352
|
|
5353
|
for (my $i = 0; $i < length ($str); $i++)
|
5354
|
{
|
5355
|
my $c = substr ($str, $i, 1);
|
5356
|
|
5357
|
if (ord ($c) <= 32 ||
|
5358
|
ord ($c) > 126 ||
|
5359
|
$c eq "\"" ||
|
5360
|
$c eq "\'")
|
5361
|
{
|
5362
|
$c = sprintf ("\\%02x", ord($c));
|
5363
|
}
|
5364
|
elsif ($inquotes && $c eq "\\")
|
5365
|
{
|
5366
|
$c = "\\\\";
|
5367
|
}
|
5368
|
|
5369
|
$escstr .= $c;
|
5370
|
}
|
5371
|
|
5372
|
$escstr;
|
5373
|
}
|
5374
|
|
5375
|
|
5376
|
#
|
5377
|
# convert a hex-escaped string into an unescaped string,
|
5378
|
# returning the unescaped string
|
5379
|
#
|
5380
|
sub un_esc_str {
|
5381
|
my $str = shift;
|
5382
|
|
5383
|
$str =~ s{\\([0-9a-f]{2})}{chr(hex($1))}eg;
|
5384
|
|
5385
|
$str;
|
5386
|
}
|
5387
|
|
5388
|
|
5389
|
sub syslog_die {
|
5390
|
my $msg = shift;
|
5391
|
|
5392
|
syslog ("err", $msg);
|
5393
|
die "$msg\n";
|
5394
|
}
|
5395
|
|
5396
|
no warnings; # Redefining syslog
|
5397
|
sub syslog {
|
5398
|
eval {
|
5399
|
local $SIG{"__DIE__"}= sub { };
|
5400
|
my @log = map { s/\%//mg; } @_;
|
5401
|
Sys::Syslog::syslog(@log);
|
5402
|
}
|
5403
|
}
|
5404
|
use warnings;
|
5405
|
|
5406
|
#
|
5407
|
# Have a "conversation" with a PAM authentication module. This fools the
|
5408
|
# PAM module into authenticating us non-interactively.
|
5409
|
#
|
5410
|
sub pam_conv_func {
|
5411
|
my @res;
|
5412
|
while ( @_ ) {
|
5413
|
my $code = shift;
|
5414
|
my $msg = shift;
|
5415
|
my $ans = "";
|
5416
|
|
5417
|
$ans = $PAM_username if ($code == Authen::PAM::PAM_PROMPT_ECHO_ON() );
|
5418
|
$ans = $PAM_password if ($code == Authen::PAM::PAM_PROMPT_ECHO_OFF() );
|
5419
|
|
5420
|
push @res, Authen::PAM::PAM_SUCCESS();
|
5421
|
push @res, $ans;
|
5422
|
}
|
5423
|
push @res, Authen::PAM::PAM_SUCCESS();
|
5424
|
return @res;
|
5425
|
}
|
5426
|
|
5427
|
|
5428
|
sub write_dtlog
|
5429
|
{
|
5430
|
my ($sref, $group, $service) = @_;
|
5431
|
|
5432
|
my $tmnow = time;
|
5433
|
|
5434
|
$sref->{"_first_failure"} = $START_TIME
|
5435
|
if ($sref->{"_first_failure"} == 0);
|
5436
|
|
5437
|
if (!open (DTLOG, ">>$CF{DTLOGFILE}"))
|
5438
|
{
|
5439
|
syslog ('err', "could not append to $CF{DTLOGFILE}: $!");
|
5440
|
$CF{"DTLOGGING"} = 0;
|
5441
|
}
|
5442
|
|
5443
|
else
|
5444
|
{
|
5445
|
$CF{"DTLOGGING"} = 1;
|
5446
|
print DTLOG ($tmnow,
|
5447
|
" $group",
|
5448
|
" $service",
|
5449
|
" ", 0 + $sref->{"_first_failure"},
|
5450
|
" ", 0 + $tmnow - $sref->{"_first_failure"},
|
5451
|
" ", 0 + $sref->{'interval'},
|
5452
|
" $sref->{'_last_summary'}\n") or
|
5453
|
syslog ('err', "error writing to $CF{DTLOGFILE}: $!");
|
5454
|
close(DTLOG);
|
5455
|
}
|
5456
|
}
|
5457
|
|
5458
|
# Perl's "system" function blocks. We don't want the mon process to
|
5459
|
# ever block. So we fork then call system. Mon will handle the
|
5460
|
# child process cleanup elsewhere.
|
5461
|
sub mysystem {
|
5462
|
my @args = @_;
|
5463
|
my $pid;
|
5464
|
print STDERR "mysystem called: @args\n";
|
5465
|
if ($pid = fork()) { ## parent
|
5466
|
return;
|
5467
|
} elsif (defined($pid)) { ## child
|
5468
|
system(@args);
|
5469
|
exit(0)
|
5470
|
} else { ## parent - fork failed
|
5471
|
print STDERR "You lose!\n";
|
5472
|
}
|
5473
|
print STDERR "mysystem returning\n";
|
5474
|
};
|