1 |
95b003ff
|
Origo
|
#!/usr/bin/perl
|
2 |
|
|
#
|
3 |
|
|
# mon - schedules service tests and triggers alerts upon failures
|
4 |
|
|
#
|
5 |
|
|
# Jim Trocki, trockij@arctic.org
|
6 |
|
|
#
|
7 |
|
|
# $Id: mon.pl,v 1.1 2012-10-23 19:57:32 cabo Exp $
|
8 |
|
|
#
|
9 |
|
|
# Copyright (C) 1998 Jim Trocki
|
10 |
|
|
#
|
11 |
|
|
# This program is free software; you can redistribute it and/or modify
|
12 |
|
|
# it under the terms of the GNU General Public License as published by
|
13 |
|
|
# the Free Software Foundation; either version 2 of the License, or
|
14 |
|
|
# (at your option) any later version.
|
15 |
|
|
#
|
16 |
|
|
# This program is distributed in the hope that it will be useful,
|
17 |
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18 |
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19 |
|
|
# GNU General Public License for more details.
|
20 |
|
|
#
|
21 |
|
|
# You should have received a copy of the GNU General Public License
|
22 |
|
|
# along with this program; if not, write to the Free Software
|
23 |
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
24 |
|
|
#
|
25 |
|
|
#
|
26 |
|
|
use strict;
|
27 |
|
|
|
28 |
|
|
my $RCSID='$Id: mon.pl,v 1.1 2012-10-23 19:57:32 cabo Exp $';
|
29 |
|
|
my $AUTHOR='trockij@arctic.org';
|
30 |
|
|
my $RELEASE='$Name: $';
|
31 |
|
|
|
32 |
|
|
#
|
33 |
|
|
# NetBSD rc.d script compatibility
|
34 |
|
|
#
|
35 |
|
|
$0= "mon" . " " . join(" ", @ARGV) if $^O eq "netbsd";
|
36 |
|
|
|
37 |
|
|
#
|
38 |
|
|
# modules in the perl distribution
|
39 |
|
|
#
|
40 |
|
|
use Getopt::Long qw(:config no_ignore_case);
|
41 |
|
|
use Text::ParseWords;
|
42 |
|
|
use POSIX;
|
43 |
|
|
use Fcntl;
|
44 |
|
|
use Socket;
|
45 |
|
|
use Sys::Hostname;
|
46 |
|
|
use Sys::Syslog qw(:DEFAULT);
|
47 |
|
|
use FileHandle;
|
48 |
|
|
|
49 |
|
|
use Data::Dumper;
|
50 |
|
|
|
51 |
|
|
#
|
52 |
|
|
# CPAN modules
|
53 |
|
|
#
|
54 |
|
|
use Time::HiRes qw(gettimeofday tv_interval usleep);
|
55 |
|
|
use Time::Period;
|
56 |
|
|
|
57 |
|
|
sub auth;
|
58 |
|
|
sub call_alert;
|
59 |
|
|
sub check_auth;
|
60 |
|
|
sub clear_timers;
|
61 |
|
|
sub client_accept;
|
62 |
|
|
sub client_close;
|
63 |
|
|
sub client_command;
|
64 |
|
|
sub client_dopending;
|
65 |
|
|
sub client_write_opstatus;
|
66 |
|
|
sub collect_output;
|
67 |
|
|
sub daemon;
|
68 |
|
|
sub debug;
|
69 |
|
|
sub debug_dir;
|
70 |
|
|
sub dep_ok;
|
71 |
|
|
sub dep_summary;
|
72 |
|
|
sub depend;
|
73 |
|
|
sub dhmstos;
|
74 |
|
|
sub die_die;
|
75 |
|
|
sub disen_host;
|
76 |
|
|
sub disen_service;
|
77 |
|
|
sub disen_watch;
|
78 |
|
|
sub do_alert;
|
79 |
|
|
sub do_startup_alerts;
|
80 |
|
|
sub err_startup;
|
81 |
|
|
sub esc_str;
|
82 |
|
|
sub gen_scriptdir_hash;
|
83 |
|
|
sub handle_io;
|
84 |
|
|
sub handle_trap;
|
85 |
|
|
sub handle_trap_timeout;
|
86 |
|
|
sub host_exists;
|
87 |
|
|
sub host_singleton_group;
|
88 |
|
|
sub inRange;
|
89 |
|
|
sub init_cf_globals;
|
90 |
|
|
sub init_globals;
|
91 |
|
|
sub load_auth;
|
92 |
|
|
sub load_state;
|
93 |
|
|
sub normalize_paths;
|
94 |
|
|
sub mysystem;
|
95 |
|
|
sub init_dtlog;
|
96 |
|
|
sub pam_conv_func;
|
97 |
|
|
sub proc_cleanup;
|
98 |
|
|
sub process_event;
|
99 |
|
|
sub randomize_startdelay;
|
100 |
|
|
sub read_cf;
|
101 |
|
|
sub readhistoricfile;
|
102 |
|
|
sub reload;
|
103 |
|
|
sub remove_proc;
|
104 |
|
|
sub reset_server;
|
105 |
|
|
sub run_monitor;
|
106 |
|
|
sub save_state;
|
107 |
|
|
sub set_last_test;
|
108 |
|
|
sub set_op_status;
|
109 |
|
|
sub reset_timer;
|
110 |
|
|
sub setup_server;
|
111 |
|
|
sub sock_write;
|
112 |
|
|
sub syslog_die;
|
113 |
|
|
sub un_esc_str;
|
114 |
|
|
sub usage;
|
115 |
|
|
sub write_dtlog;
|
116 |
|
|
|
117 |
|
|
#
|
118 |
|
|
# globals
|
119 |
|
|
#
|
120 |
|
|
my %opt; # cmdline arguments
|
121 |
|
|
my %CF; # configuration directives
|
122 |
|
|
my $PWD; # current working directory
|
123 |
|
|
my $HOSTNAME; # system hostname
|
124 |
|
|
my $STOPPED; # 1 = scheduler stopped, 0 = not stopped
|
125 |
|
|
my $STOPPED_TIME; # time(2) scheduler was stopped, if stopped
|
126 |
|
|
my $SLEEPINT; # don't touch
|
127 |
|
|
my %watch_disabled; # watches disabled, indexed by watch
|
128 |
|
|
my %watch; # main configuration file data structure
|
129 |
|
|
my %alias; # aliases
|
130 |
|
|
my %groups; # hostgroups, indexed by group
|
131 |
|
|
my %views; # view lists, indexed by name
|
132 |
|
|
my %view_users; # view preferences, per user
|
133 |
|
|
|
134 |
|
|
#
|
135 |
|
|
# I/O routine globals
|
136 |
|
|
#
|
137 |
|
|
my %clients; # fds of connected clients
|
138 |
|
|
my $numclients; # count of connected clients
|
139 |
|
|
my %running; # procs which are forked and running,
|
140 |
|
|
# indexed by group/service
|
141 |
|
|
my $iovec; # used for select loop
|
142 |
|
|
my %runningpid; # procs which are forked and running,
|
143 |
|
|
# indexed by PID
|
144 |
|
|
my $procs; # number of outstanding procs
|
145 |
|
|
my %fhandles; # input file handles of children
|
146 |
|
|
my %ibufs; # buffer structure to hold data from children
|
147 |
|
|
my ($fdset_rbits, $fdset_ebits);
|
148 |
|
|
|
149 |
|
|
#
|
150 |
|
|
# history globals
|
151 |
|
|
#
|
152 |
|
|
my @last_alerts; # alert history, in memory
|
153 |
|
|
my @last_failures; # failure history, in memory
|
154 |
|
|
|
155 |
|
|
#
|
156 |
|
|
# misc. globals
|
157 |
|
|
#
|
158 |
|
|
my $i; # loop iteration counter, used for debugging only
|
159 |
|
|
my $lasttm; # the last time(2) the mon loop started
|
160 |
|
|
my $pid_file_owner; # set when creating pid file
|
161 |
|
|
my $tm; # used in main loop
|
162 |
|
|
|
163 |
|
|
#
|
164 |
|
|
# authentication structure globals
|
165 |
|
|
#
|
166 |
|
|
my %AUTHCMDS;
|
167 |
|
|
my %NOAUTHCMDS;
|
168 |
|
|
my %AUTHTRAPS;
|
169 |
|
|
|
170 |
|
|
#
|
171 |
|
|
# PAM authentication globals (must not be lexically scoped)
|
172 |
|
|
#
|
173 |
|
|
use vars qw ( $PAM_username $PAM_password ) ;
|
174 |
|
|
|
175 |
|
|
|
176 |
|
|
#
|
177 |
|
|
# opstatus globals
|
178 |
|
|
#
|
179 |
|
|
my (%OPSTAT, %FAILURE, %SUCCESS, %WARNING); # operational statuses
|
180 |
|
|
my ($TRAP_COLDSTART, $TRAP_WARMSTART, # trap types
|
181 |
|
|
$TRAP_LINKDOWN, $TRAP_LINKUP,
|
182 |
|
|
$TRAP_AUTHFAIL, $TRAP_EGPNEIGHBORLOSS,
|
183 |
|
|
$TRAP_ENTERPRISE, $TRAP_HEARTBEAT);
|
184 |
|
|
|
185 |
|
|
my ($STAT_FAIL, $STAT_OK, $STAT_COLDSTART, # _op_status values
|
186 |
|
|
$STAT_WARMSTART, $STAT_LINKDOWN,
|
187 |
|
|
$STAT_UNKNOWN, $STAT_TIMEOUT,
|
188 |
|
|
$STAT_UNTESTED, $STAT_DEPEND, $STAT_WARN);
|
189 |
|
|
|
190 |
|
|
my ($FL_MONITOR, $FL_UPALERT, # alert type flags
|
191 |
|
|
$FL_TRAP, $FL_TRAPTIMEOUT,
|
192 |
|
|
$FL_STARTUPALERT, $FL_TEST, $FL_REDISTRIBUTE,
|
193 |
|
|
$FL_ACKALERT, $FL_DISABLEALERT);
|
194 |
|
|
|
195 |
|
|
my $TRAP_PDU;
|
196 |
|
|
my (%ALERTHASH, %MONITORHASH); # hash of pathnames for
|
197 |
|
|
# alerts/monitors
|
198 |
|
|
my $PROT_VERSION;
|
199 |
|
|
my $START_TIME; # time(2) server started
|
200 |
|
|
my $TRAP_PRO_VERSION; # trap protocol version
|
201 |
|
|
my $DEP_EVAL_SANDBOX; # perl environment for
|
202 |
|
|
# dep evals
|
203 |
|
|
|
204 |
|
|
#
|
205 |
|
|
# argument parsing
|
206 |
|
|
#
|
207 |
|
|
my $getopt_result = GetOptions(\%opt,
|
208 |
|
|
qw/
|
209 |
|
|
A|authfile=s
|
210 |
|
|
B|cfbasedir=s
|
211 |
|
|
D|statedir=s
|
212 |
|
|
L|logdir=s
|
213 |
|
|
M|m4:s
|
214 |
|
|
O|syslogfacility=s
|
215 |
|
|
P|pidfile=s
|
216 |
|
|
S|stopped
|
217 |
|
|
a|alertdir=s
|
218 |
|
|
b|basedir=s
|
219 |
|
|
c|configfile=s
|
220 |
|
|
d|debug+
|
221 |
|
|
f|fork
|
222 |
|
|
h|help
|
223 |
|
|
i|sleep=i
|
224 |
|
|
k|maxkeep=i
|
225 |
|
|
l|loadstate:s
|
226 |
|
|
m|maxprocs=i
|
227 |
|
|
p|port=i
|
228 |
|
|
r|randstart=s
|
229 |
|
|
s|scriptdir=s
|
230 |
|
|
t|trapport=i
|
231 |
|
|
v|version
|
232 |
|
|
/);
|
233 |
|
|
|
234 |
|
|
if (!$getopt_result) {
|
235 |
|
|
usage();
|
236 |
|
|
exit;
|
237 |
|
|
}
|
238 |
|
|
|
239 |
|
|
#
|
240 |
|
|
# these two things can be taken care of without
|
241 |
|
|
# initializing things further
|
242 |
|
|
#
|
243 |
|
|
if ($opt{"v"}) {
|
244 |
|
|
print "$RCSID\n$RELEASE\n";
|
245 |
|
|
exit;
|
246 |
|
|
}
|
247 |
|
|
|
248 |
|
|
if ($opt{"h"}) {
|
249 |
|
|
usage();
|
250 |
|
|
exit;
|
251 |
|
|
}
|
252 |
|
|
|
253 |
|
|
if ($opt{"d"})
|
254 |
|
|
{
|
255 |
|
|
eval 'require Data::Dumper;';
|
256 |
|
|
|
257 |
|
|
if ($@ ne "")
|
258 |
|
|
{
|
259 |
|
|
die "error: $@\n";
|
260 |
|
|
}
|
261 |
|
|
}
|
262 |
|
|
|
263 |
|
|
if ($^O eq "linux" || $^O =~ /^(open|free|net)bsd$/ || $^O eq "aix")
|
264 |
|
|
{
|
265 |
|
|
Sys::Syslog::setlogsock ('unix');
|
266 |
|
|
}
|
267 |
|
|
|
268 |
|
|
elsif ($^O eq "solaris")
|
269 |
|
|
{
|
270 |
|
|
Sys::Syslog::setlogsock ('stream');
|
271 |
|
|
}
|
272 |
|
|
|
273 |
|
|
openlog ("mon", "cons,pid", $CF{"SYSLOG_FACILITY"});
|
274 |
|
|
|
275 |
|
|
#
|
276 |
|
|
# definitions
|
277 |
|
|
#
|
278 |
|
|
die "basedir $opt{b} does not exist\n" if ($opt{"b"} && ! -d $opt{"b"});
|
279 |
|
|
|
280 |
|
|
init_globals();
|
281 |
|
|
init_cf_globals();
|
282 |
|
|
|
283 |
|
|
syslog_die ("config file $CF{CF} does not exist") if (! -f $CF{"CF"});
|
284 |
|
|
|
285 |
|
|
#
|
286 |
|
|
# read config file
|
287 |
|
|
#
|
288 |
|
|
if ((my $err = read_cf ($CF{"CF"}, 1)) ne "") {
|
289 |
|
|
syslog_die ("$err");
|
290 |
|
|
}
|
291 |
|
|
|
292 |
|
|
closelog;
|
293 |
|
|
|
294 |
|
|
openlog ("mon", "cons,pid", $CF{"SYSLOG_FACILITY"});
|
295 |
|
|
|
296 |
|
|
#
|
297 |
|
|
# cmdline args override config file
|
298 |
|
|
#
|
299 |
|
|
$CF{"ALERTDIR"} = $opt{"a"} if ($opt{"a"});
|
300 |
|
|
$CF{"BASEDIR"} = $opt{"b"} if ($opt{"b"});
|
301 |
|
|
$CF{"AUTHFILE"} = $opt{"A"} if ($opt{"A"});
|
302 |
|
|
$CF{"LOGDIR"} = $opt{"L"} if ($opt{"L"});
|
303 |
|
|
$CF{"STATEDIR"} = $opt{"D"} if ($opt{"D"});
|
304 |
|
|
$CF{"SCRIPTDIR"} = $opt{"s"} if ($opt{"s"});
|
305 |
|
|
|
306 |
|
|
$CF{"PIDFILE"} = $opt{"P"} if defined($opt{"P"}); # allow empty pidfile
|
307 |
|
|
$CF{"MAX_KEEP"} = $opt{"k"} if ($opt{"k"});
|
308 |
|
|
$CF{"MAXPROCS"} = $opt{"m"} if ($opt{"m"});
|
309 |
|
|
$CF{"SERVPORT"} = $opt{"p"} if ($opt{"p"});
|
310 |
|
|
$CF{"TRAPPORT"} = $opt{"t"} if ($opt{"t"});
|
311 |
|
|
|
312 |
|
|
$SLEEPINT = $opt{"i"} if ($opt{"i"});
|
313 |
|
|
|
314 |
|
|
if ($opt{"r"}) {
|
315 |
|
|
syslog_die ("bad randstart value") if (!defined (dhmstos ($opt{"r"})));
|
316 |
|
|
$CF{"RANDSTART"} = dhmstos($opt{"r"});
|
317 |
|
|
}
|
318 |
|
|
|
319 |
|
|
if ($opt{"S"}) {
|
320 |
|
|
$STOPPED = 1;
|
321 |
|
|
$STOPPED_TIME = time;
|
322 |
|
|
}
|
323 |
|
|
|
324 |
|
|
|
325 |
|
|
#
|
326 |
|
|
# do some path cleanups and
|
327 |
|
|
# build lookup tables for alerts and monitors
|
328 |
|
|
#
|
329 |
|
|
normalize_paths();
|
330 |
|
|
gen_scriptdir_hash();
|
331 |
|
|
|
332 |
|
|
if ($opt{"d"}) {
|
333 |
|
|
debug_dir();
|
334 |
|
|
}
|
335 |
|
|
|
336 |
|
|
#
|
337 |
|
|
# load the auth control, bind, and listen
|
338 |
|
|
#
|
339 |
|
|
load_auth (1);
|
340 |
|
|
load_view_users(1);
|
341 |
|
|
|
342 |
|
|
#
|
343 |
|
|
# init client interface
|
344 |
|
|
# %clients is an I/O structure, indexed by the fd of the client
|
345 |
|
|
# $numclients is the number of clients currently connected
|
346 |
|
|
# $iovec is fd_set for clients and traps
|
347 |
|
|
#
|
348 |
|
|
%clients = ();
|
349 |
|
|
$numclients = 0;
|
350 |
|
|
$iovec = '';
|
351 |
|
|
setup_server();
|
352 |
|
|
|
353 |
|
|
#
|
354 |
|
|
# fork and become a daemon
|
355 |
|
|
#
|
356 |
|
|
init_dtlog() if ($CF{"DTLOGGING"});
|
357 |
|
|
daemon() if ($opt{"f"});
|
358 |
|
|
if ($CF{"PIDFILE"} ne '' && open PID, ">$CF{PIDFILE}") {
|
359 |
|
|
$pid_file_owner = $$;
|
360 |
|
|
print PID "$pid_file_owner\n";
|
361 |
|
|
close PID;
|
362 |
|
|
}
|
363 |
|
|
set_last_test ();
|
364 |
|
|
|
365 |
|
|
#
|
366 |
|
|
# randomize startup checks if asked to
|
367 |
|
|
#
|
368 |
|
|
randomize_startdelay() if ($CF{"RANDSTART"});
|
369 |
|
|
|
370 |
|
|
@last_alerts = ();
|
371 |
|
|
@last_failures = ();
|
372 |
|
|
readhistoricfile ();
|
373 |
|
|
|
374 |
|
|
$procs = 0;
|
375 |
|
|
$i=0;
|
376 |
|
|
$lasttm=time;
|
377 |
|
|
$fdset_rbits = $fdset_ebits = '';
|
378 |
|
|
%watch_disabled = ();
|
379 |
|
|
|
380 |
|
|
$SIG{HUP} = \&reset_server;
|
381 |
|
|
$SIG{INT} = \&handle_sigterm; # for interactive debugging
|
382 |
|
|
$SIG{TERM} = \&handle_sigterm;
|
383 |
|
|
$SIG{PIPE} = 'IGNORE';
|
384 |
|
|
|
385 |
|
|
#
|
386 |
|
|
# load previously saved state
|
387 |
|
|
#
|
388 |
|
|
if (exists $opt{"l"}) {
|
389 |
|
|
if ($opt{"l"}) {
|
390 |
|
|
# If -l was given an argument (all, disabled, opstatus, etc...)
|
391 |
|
|
# pass that to load_state
|
392 |
|
|
load_state($opt{"l"});
|
393 |
|
|
}else{
|
394 |
|
|
# Otherwise default to old behavior of just loading disabled hosts/services/groups
|
395 |
|
|
load_state("disabled");
|
396 |
|
|
}
|
397 |
|
|
}
|
398 |
|
|
|
399 |
|
|
|
400 |
|
|
|
401 |
|
|
syslog ('info', "mon server started");
|
402 |
|
|
|
403 |
|
|
#
|
404 |
|
|
# startup alerts
|
405 |
|
|
#
|
406 |
|
|
do_startup_alerts();
|
407 |
|
|
|
408 |
|
|
#
|
409 |
|
|
# main monitoring loop
|
410 |
|
|
#
|
411 |
|
|
for (;;) {
|
412 |
|
|
debug (1, "$i" . ($STOPPED ? " (stopped)" : "") . "\n");
|
413 |
|
|
$i++;
|
414 |
|
|
$tm = time;
|
415 |
|
|
|
416 |
|
|
#
|
417 |
|
|
# step through the watch groups, decrementing and
|
418 |
|
|
# handing expired timers
|
419 |
|
|
#
|
420 |
|
|
if (!$STOPPED) {
|
421 |
|
|
if (defined $CF{"EXCLUDE_PERIOD"}
|
422 |
|
|
&& $CF{"EXCLUDE_PERIOD"} ne "" &&
|
423 |
|
|
inPeriod (time, $CF{"EXCLUDE_PERIOD"})) {
|
424 |
|
|
debug (1, "not running monitors because of global exclude_period\n");
|
425 |
|
|
} else {
|
426 |
|
|
foreach my $group (keys %watch) {
|
427 |
|
|
foreach my $service (keys %{$watch{$group}}) {
|
428 |
|
|
|
429 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
430 |
|
|
|
431 |
|
|
my $t = $tm - $lasttm;
|
432 |
|
|
$t = 1 if ($t <= 0);
|
433 |
|
|
|
434 |
|
|
#
|
435 |
|
|
# trap timer
|
436 |
|
|
#
|
437 |
|
|
if ($sref->{"traptimeout"}) {
|
438 |
|
|
$sref->{"_trap_timer"} -= $t;
|
439 |
|
|
|
440 |
|
|
if ($sref->{"_trap_timer"} <= 0 &&
|
441 |
|
|
$tm - $sref->{"_last_trap"} > $sref->{"traptimeout"})
|
442 |
|
|
{
|
443 |
|
|
$sref->{"_trap_timer"} = $sref->{"traptimeout"};
|
444 |
|
|
handle_trap_timeout ($group, $service);
|
445 |
|
|
}
|
446 |
|
|
}
|
447 |
|
|
|
448 |
|
|
#
|
449 |
|
|
# trap duration timer
|
450 |
|
|
#
|
451 |
|
|
if (defined ($sref->{"_trap_duration_timer"})) {
|
452 |
|
|
$sref->{"_trap_duration_timer"} -= $t;
|
453 |
|
|
|
454 |
|
|
if ($sref->{"_trap_duration_timer"} <= 0) {
|
455 |
|
|
set_op_status ($group, $service, $STAT_OK);
|
456 |
|
|
undef $sref->{"_trap_duration_timer"};
|
457 |
|
|
}
|
458 |
|
|
}
|
459 |
|
|
|
460 |
|
|
#
|
461 |
|
|
# polling monitor timer
|
462 |
|
|
#
|
463 |
|
|
if ($sref->{"interval"} && $sref->{"_timer"} <= 0 &&
|
464 |
|
|
!$running{"$group/$service"})
|
465 |
|
|
{
|
466 |
|
|
if (!$CF{"MAXPROCS"} || $procs < $CF{"MAXPROCS"})
|
467 |
|
|
{
|
468 |
|
|
if (defined $sref->{"exclude_period"}
|
469 |
|
|
&& $sref->{"exclude_period"} ne "" &&
|
470 |
|
|
inPeriod (time, $sref->{"exclude_period"}))
|
471 |
|
|
{
|
472 |
|
|
debug (1, "not running $group,$service because of exclude_period\n");
|
473 |
|
|
}
|
474 |
|
|
|
475 |
|
|
elsif (($sref->{"dep_behavior"} eq "m" &&
|
476 |
|
|
defined $sref->{"depend"} && $sref->{"depend"} ne "")
|
477 |
|
|
|| (defined $sref->{"monitordepend"} && $sref->{"monitordepend"} ne ""))
|
478 |
|
|
{
|
479 |
|
|
if (dep_ok ($sref, 'm'))
|
480 |
|
|
{
|
481 |
|
|
run_monitor ($group, $service);
|
482 |
|
|
}
|
483 |
|
|
|
484 |
|
|
else
|
485 |
|
|
{
|
486 |
|
|
debug (1, "not running $group,$service because of depend\n");
|
487 |
|
|
}
|
488 |
|
|
}
|
489 |
|
|
|
490 |
|
|
else
|
491 |
|
|
{
|
492 |
|
|
run_monitor ($group, $service);
|
493 |
|
|
}
|
494 |
|
|
}
|
495 |
|
|
|
496 |
|
|
else
|
497 |
|
|
{
|
498 |
|
|
syslog ('info', "throttled at $procs processes");
|
499 |
|
|
}
|
500 |
|
|
}
|
501 |
|
|
|
502 |
|
|
else
|
503 |
|
|
{
|
504 |
|
|
$sref->{"_timer"} -= $t;
|
505 |
|
|
if ($sref->{"_timer"} < 0)
|
506 |
|
|
{
|
507 |
|
|
$sref->{"_timer"} = 0;
|
508 |
|
|
}
|
509 |
|
|
}
|
510 |
|
|
}
|
511 |
|
|
}
|
512 |
|
|
}
|
513 |
|
|
}
|
514 |
|
|
|
515 |
|
|
$lasttm = time;
|
516 |
|
|
|
517 |
|
|
#
|
518 |
|
|
# collect any output from subprocs
|
519 |
|
|
#
|
520 |
|
|
collect_output;
|
521 |
|
|
|
522 |
|
|
#
|
523 |
|
|
# clean up after exited processes, and trigger alerts
|
524 |
|
|
#
|
525 |
|
|
proc_cleanup;
|
526 |
|
|
|
527 |
|
|
#
|
528 |
|
|
# handle client, server, and trap I/O
|
529 |
|
|
# this routine sleeps for $SLEEPINT if no I/O is ready
|
530 |
|
|
#
|
531 |
|
|
handle_io;
|
532 |
|
|
}
|
533 |
|
|
|
534 |
|
|
die "not reached";
|
535 |
|
|
|
536 |
|
|
END {
|
537 |
|
|
unlink $CF{"PIDFILE"} if $$ == $pid_file_owner && $CF{"PIDFILE"} ne '';
|
538 |
|
|
}
|
539 |
|
|
|
540 |
|
|
|
541 |
|
|
##############################################################################
|
542 |
|
|
|
543 |
|
|
#
|
544 |
|
|
# startup alerts
|
545 |
|
|
#
|
546 |
|
|
sub do_startup_alerts {
|
547 |
|
|
foreach my $group (keys %watch) {
|
548 |
|
|
foreach my $service (keys %{$watch{$group}}) {
|
549 |
|
|
do_alert ($group, $service, "", 0, $FL_STARTUPALERT);
|
550 |
|
|
}
|
551 |
|
|
}
|
552 |
|
|
}
|
553 |
|
|
|
554 |
|
|
|
555 |
|
|
#
|
556 |
|
|
# handle alert event, throttling the alert call if necessary
|
557 |
|
|
#
|
558 |
|
|
sub do_alert {
|
559 |
|
|
my ($group, $service, $output, $retval, $flags) = @_;
|
560 |
|
|
my (@groupargs, $last_alert, $alert);
|
561 |
|
|
my ($sref, $range, @alerts);
|
562 |
|
|
|
563 |
|
|
debug (1, "do_alert flags=$flags\n");
|
564 |
|
|
|
565 |
|
|
$sref = \%{$watch{$group}->{$service}};
|
566 |
|
|
|
567 |
|
|
my $tmnow = time;
|
568 |
|
|
|
569 |
|
|
if ($STOPPED) {
|
570 |
|
|
syslog ("notice", "ignoring alert for $group,$service because the mon scheduler is stopped");
|
571 |
|
|
return;
|
572 |
|
|
}
|
573 |
|
|
|
574 |
|
|
#
|
575 |
|
|
# if redistribute it set, call it now
|
576 |
|
|
#
|
577 |
|
|
if ($sref->{"redistribute"} ne '')
|
578 |
|
|
{
|
579 |
|
|
my ($fac, $args);
|
580 |
|
|
($fac, $args) = split (/\s+/, $sref->{"redistribute"}, 2);
|
581 |
|
|
call_alert (
|
582 |
|
|
group => $group,
|
583 |
|
|
service => $service,
|
584 |
|
|
output => $output,
|
585 |
|
|
retval => $retval,
|
586 |
|
|
flags => $flags | $FL_REDISTRIBUTE,
|
587 |
|
|
|
588 |
|
|
alert => $fac,
|
589 |
|
|
args => $args,
|
590 |
|
|
)
|
591 |
|
|
}
|
592 |
|
|
|
593 |
|
|
#
|
594 |
|
|
# if the alarm is disabled, ignore it
|
595 |
|
|
#
|
596 |
|
|
if ((exists $watch_disabled{$group} && $watch_disabled{$group} == 1)
|
597 |
|
|
|| (defined $sref->{"disable"} && $sref->{"disable"} == 1))
|
598 |
|
|
{
|
599 |
|
|
syslog ("notice", "ignoring alert for $group,$service");
|
600 |
|
|
return;
|
601 |
|
|
}
|
602 |
|
|
|
603 |
|
|
#
|
604 |
|
|
# dependency check
|
605 |
|
|
#
|
606 |
|
|
if (!($flags & $FL_STARTUPALERT) &&
|
607 |
|
|
!($flags & $FL_UPALERT) &&
|
608 |
|
|
((defined $sref->{"depend"} && $sref->{"dep_behavior"} eq "a")
|
609 |
|
|
|| (defined $sref->{"alertdepend"})))
|
610 |
|
|
{
|
611 |
|
|
if (!$sref->{"_depend_status"})
|
612 |
|
|
{
|
613 |
|
|
debug (1, "alert for $group,$service supressed because of dep fail\n");
|
614 |
|
|
return;
|
615 |
|
|
}
|
616 |
|
|
}
|
617 |
|
|
|
618 |
|
|
my ($summary) = split("\n", $output);
|
619 |
|
|
$summary = "(NO SUMMARY)" if (!defined $summary || $summary =~ /^\s*$/m);
|
620 |
|
|
my ($prevsumm) = split("\n", $sref->{"_failure_output"}) if (defined $sref->{"_failure_output"});
|
621 |
|
|
$prevsumm = "(NO SUMMARY)" if (!defined $prevsumm || $prevsumm =~ /^\s*$/m);
|
622 |
|
|
|
623 |
|
|
|
624 |
|
|
my $strippedsummary = $summary;
|
625 |
|
|
$strippedsummary =~ s/\s//mg;
|
626 |
|
|
my $strippedprevious = $prevsumm;
|
627 |
|
|
$strippedprevious =~ s/\s//mg;
|
628 |
|
|
# If the summary changed, un-acknowledge the service if 'unack_summary' is set
|
629 |
|
|
if ($sref->{'_ack'} != 0
|
630 |
|
|
&& $sref->{'unack_summary'} == 1
|
631 |
|
|
&& $strippedsummary ne $strippedprevious
|
632 |
|
|
&& !($flags & ($FL_UPALERT|$FL_ACKALERT|$FL_DISABLEALERT))) {
|
633 |
|
|
print STDERR "Unacking $group/$service:\nSummary: X".$strippedsummary."X\nPrevious: X".$strippedprevious."X\n";
|
634 |
|
|
$sref->{"_ack"} = 0;
|
635 |
|
|
$sref->{"_ack_comment"} = "";
|
636 |
|
|
$sref->{"_consec_failures"}=1;
|
637 |
|
|
foreach my $period (keys %{$sref->{"periods"}})
|
638 |
|
|
{
|
639 |
|
|
$sref->{"periods"}->{$period}->{"_last_alert"} = 0;
|
640 |
|
|
# $sref->{"periods"}->{$period}->{"_alert_sent"} = 0;
|
641 |
|
|
$sref->{"periods"}->{$period}->{"_1stfailtime"} = 0;
|
642 |
|
|
$sref->{"periods"}->{$period}->{"_failcount"} = 0;
|
643 |
|
|
}
|
644 |
|
|
}
|
645 |
|
|
|
646 |
|
|
#
|
647 |
|
|
# no alerts for ack'd failures, except for upalerts or summary changes
|
648 |
|
|
# when observe_summary is set
|
649 |
|
|
#
|
650 |
|
|
if ($sref->{"_ack"} != 0 && !($flags & ($FL_UPALERT|$FL_ACKALERT|$FL_DISABLEALERT)))
|
651 |
|
|
{
|
652 |
|
|
syslog ("debug", "no alert for $group.$service" .
|
653 |
|
|
" because of ack'd failure");
|
654 |
|
|
return;
|
655 |
|
|
}
|
656 |
|
|
|
657 |
|
|
#
|
658 |
|
|
# check each time period for pending alerts
|
659 |
|
|
#
|
660 |
|
|
foreach my $periodlabel (keys %{$sref->{"periods"}})
|
661 |
|
|
{
|
662 |
|
|
#
|
663 |
|
|
# only send alerts that are in the proper period
|
664 |
|
|
#
|
665 |
|
|
next if (!inPeriod ($tmnow, $sref->{"periods"}->{$periodlabel}->{"period"}));
|
666 |
|
|
|
667 |
|
|
my $pref = \%{$sref->{"periods"}->{$periodlabel}};
|
668 |
|
|
|
669 |
|
|
#
|
670 |
|
|
# skip upalerts/ackalerts not paired with down alerts
|
671 |
|
|
# disable by setting "no_comp_alerts" in period section
|
672 |
|
|
#
|
673 |
|
|
if (!$pref->{"no_comp_alerts"} && ($flags & ($FL_UPALERT | $FL_ACKALERT)) && !$pref->{"_alert_sent"})
|
674 |
|
|
{
|
675 |
|
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing upalert since no down alert was sent.") if ($flags & $FL_UPALERT);
|
676 |
|
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing ackalert since no down alert was sent.") if ($flags & $FL_ACKALERT);
|
677 |
|
|
next;
|
678 |
|
|
}
|
679 |
|
|
|
680 |
|
|
#
|
681 |
|
|
# skip looping upalerts when "no_comp-alerts" set.
|
682 |
|
|
#
|
683 |
|
|
if ($pref->{"no_comp_alerts"} && ($flags & $FL_UPALERT) && ($pref->{"_no_comp_alerts_upalert_sent"}>0))
|
684 |
|
|
{
|
685 |
|
|
next;
|
686 |
|
|
}
|
687 |
|
|
|
688 |
|
|
#
|
689 |
|
|
# do this if we're not handling an upalert, startupalert, ackalert, or disablealert
|
690 |
|
|
#
|
691 |
|
|
if (!($flags & $FL_UPALERT) && !($flags & $FL_STARTUPALERT) && !($flags & $FL_DISABLEALERT) && !($flags & $FL_ACKALERT))
|
692 |
|
|
{
|
693 |
|
|
#
|
694 |
|
|
# alert only when exit code matches
|
695 |
|
|
#
|
696 |
|
|
|
697 |
|
|
if (exists $pref->{"alertexitrange"}) {
|
698 |
|
|
next if (!inRange($retval, $pref->{"alertexitrange"}));
|
699 |
|
|
}
|
700 |
|
|
|
701 |
|
|
#
|
702 |
|
|
# alert only numalerts
|
703 |
|
|
#
|
704 |
|
|
if ($pref->{"numalerts"} &&
|
705 |
|
|
$pref->{"_alert_sent"} >= $pref->{"numalerts"})
|
706 |
|
|
{
|
707 |
|
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert since numalerts is met.");
|
708 |
|
|
next;
|
709 |
|
|
}
|
710 |
|
|
|
711 |
|
|
#
|
712 |
|
|
# only alert once every "alertevery" seconds, unless
|
713 |
|
|
# output from monitor is different or if strict alertevery
|
714 |
|
|
#
|
715 |
|
|
# strict and _ignore_summary are basically the same though
|
716 |
|
|
# strict short-circuits and overrides other settings and exists
|
717 |
|
|
# for compatibility with pre-1.1 configs
|
718 |
|
|
#
|
719 |
|
|
if ($pref->{"alertevery"} != 0 && # if alertevery is set and
|
720 |
|
|
($tmnow - $pref->{"_last_alert"} < $pref->{"alertevery"}) && # we're within the time period and one of these:
|
721 |
|
|
(($pref->{"_alertevery_strict"}) || # [ strict is set or
|
722 |
|
|
($pref->{"_observe_detail"} && $sref->{"_failure_output"} eq $output) || # observing detail and output hasn't changed or
|
723 |
|
|
(!$pref->{"_observe_detail"} && (!$pref->{"_ignore_summary"}) && ($prevsumm eq $summary)) || # not observing detail
|
724 |
|
|
# and not ignoring summary and summ hasn't changed or
|
725 |
|
|
($pref->{"_ignore_summary"}))) # we're ignoring summary changes ]
|
726 |
|
|
{
|
727 |
|
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertevery.");
|
728 |
|
|
next;
|
729 |
|
|
}
|
730 |
|
|
|
731 |
|
|
#
|
732 |
|
|
# alertafter NUM
|
733 |
|
|
#
|
734 |
|
|
if (defined $pref->{"alertafter_consec"} && ($sref->{"_consec_failures"} < $pref->{"alertafter_consec"}))
|
735 |
|
|
{
|
736 |
|
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter consecutive failures.");
|
737 |
|
|
next;
|
738 |
|
|
}
|
739 |
|
|
|
740 |
|
|
#
|
741 |
|
|
# alertafter timeval
|
742 |
|
|
#
|
743 |
|
|
elsif ( (!defined ($pref->{"alertafter"})) && (defined ($pref->{"alertafterival"})) )
|
744 |
|
|
{
|
745 |
|
|
$pref->{'_1stfailtime'} = $tmnow if $pref->{'_1stfailtime'} == 0;
|
746 |
|
|
if ($tmnow - $pref->{'_1stfailtime'} <= $pref->{'alertafterival'})
|
747 |
|
|
{
|
748 |
|
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter numval.");
|
749 |
|
|
next;
|
750 |
|
|
}
|
751 |
|
|
}
|
752 |
|
|
|
753 |
|
|
#
|
754 |
|
|
# alertafter NUM timeval
|
755 |
|
|
#
|
756 |
|
|
elsif (defined ($pref->{"alertafter"}))
|
757 |
|
|
{
|
758 |
|
|
$pref->{"_failcount"}++;
|
759 |
|
|
|
760 |
|
|
if ($tmnow - $pref->{'_1stfailtime'} <= $pref->{'alertafterival'} &&
|
761 |
|
|
$pref->{"_failcount"} < $pref->{"alertafter"})
|
762 |
|
|
{
|
763 |
|
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter num timeval.");
|
764 |
|
|
next;
|
765 |
|
|
}
|
766 |
|
|
|
767 |
|
|
#
|
768 |
|
|
# start a new time interval
|
769 |
|
|
#
|
770 |
|
|
if ($tmnow - $pref->{'_1stfailtime'} > $pref->{'alertafterival'})
|
771 |
|
|
{
|
772 |
|
|
$pref->{"_failcount"} = 1;
|
773 |
|
|
}
|
774 |
|
|
|
775 |
|
|
if ($pref->{"_failcount"} == 1)
|
776 |
|
|
{
|
777 |
|
|
$pref->{"_1stfailtime"} = $tmnow;
|
778 |
|
|
}
|
779 |
|
|
|
780 |
|
|
if ($pref->{"_failcount"} < $pref->{"alertafter"})
|
781 |
|
|
{
|
782 |
|
|
syslog ('debug', "$group/$service/$periodlabel: Suppressing alert for now due to alertafter num timeval.");
|
783 |
|
|
next;
|
784 |
|
|
}
|
785 |
|
|
}
|
786 |
|
|
}
|
787 |
|
|
|
788 |
|
|
#
|
789 |
|
|
# at this point, no alerts are blocked,
|
790 |
|
|
# so send the alerts
|
791 |
|
|
#
|
792 |
|
|
|
793 |
|
|
#
|
794 |
|
|
# trigger multiple alerts in this period
|
795 |
|
|
#
|
796 |
|
|
if ($flags & $FL_UPALERT)
|
797 |
|
|
{
|
798 |
|
|
@alerts = @{$pref->{"upalerts"}};
|
799 |
|
|
}
|
800 |
|
|
elsif ($flags & $FL_STARTUPALERT)
|
801 |
|
|
{
|
802 |
|
|
@alerts = @{$pref->{"startupalerts"}};
|
803 |
|
|
}
|
804 |
|
|
elsif ($flags & $FL_DISABLEALERT)
|
805 |
|
|
{
|
806 |
|
|
@alerts = @{$pref->{"disablealerts"}};
|
807 |
|
|
}
|
808 |
|
|
elsif ($flags & $FL_ACKALERT)
|
809 |
|
|
{
|
810 |
|
|
@alerts = @{$pref->{"ackalerts"}};
|
811 |
|
|
}
|
812 |
|
|
else
|
813 |
|
|
{
|
814 |
|
|
@alerts = @{$pref->{"alerts"}};
|
815 |
|
|
}
|
816 |
|
|
|
817 |
|
|
my $called = 0;
|
818 |
|
|
|
819 |
|
|
for (my $i=0;$i<@alerts;$i++)
|
820 |
|
|
{
|
821 |
|
|
my ($range, $fac, $args);
|
822 |
|
|
|
823 |
|
|
if ($alerts[$i] =~ /^exit\s*=\s*((\d+|\d+-\d+))\s/i)
|
824 |
|
|
{
|
825 |
|
|
$range=$1;
|
826 |
|
|
next if (!inRange($retval, $range));
|
827 |
|
|
($fac, $args) = (split (/\s+/, $alerts[$i], 3))[1,2];
|
828 |
|
|
}
|
829 |
|
|
else
|
830 |
|
|
{
|
831 |
|
|
($fac, $args) = split (/\s+/, $alerts[$i], 2);
|
832 |
|
|
}
|
833 |
|
|
|
834 |
|
|
$called++ if (call_alert (
|
835 |
|
|
group => $group,
|
836 |
|
|
service => $service,
|
837 |
|
|
output => $output,
|
838 |
|
|
retval => $retval,
|
839 |
|
|
flags => $flags,
|
840 |
|
|
|
841 |
|
|
pref => $pref,
|
842 |
|
|
alert => $fac,
|
843 |
|
|
args => $args,
|
844 |
|
|
)
|
845 |
|
|
);
|
846 |
|
|
}
|
847 |
|
|
|
848 |
|
|
#
|
849 |
|
|
# reset _alert_sent if up alert was sent from a trap
|
850 |
|
|
#
|
851 |
|
|
if ($called)
|
852 |
|
|
{
|
853 |
|
|
if( (($FL_TRAP | $flags) && ($FL_UPALERT & $flags)) ) {
|
854 |
|
|
$pref->{"_alert_sent"} = 0;
|
855 |
|
|
$pref->{"_last_alert"} = 0;
|
856 |
|
|
}
|
857 |
|
|
else {
|
858 |
|
|
$pref->{"_alert_sent"}++;
|
859 |
|
|
|
860 |
|
|
#
|
861 |
|
|
# reset _no_comp_alerts_upalert_sent counter - when service will be
|
862 |
|
|
# back up, upalert will be sent.
|
863 |
|
|
#
|
864 |
|
|
if ($pref->{"no_comp_alerts"}) {
|
865 |
|
|
$pref->{"_no_comp_alerts_upalert_sent"} = 0;
|
866 |
|
|
}
|
867 |
|
|
}
|
868 |
|
|
|
869 |
|
|
if ($pref->{"no_comp_alerts"} && ($flags & $FL_UPALERT)) {
|
870 |
|
|
$pref->{"_no_comp_alerts_upalert_sent"}++;
|
871 |
|
|
}
|
872 |
|
|
}
|
873 |
|
|
}
|
874 |
|
|
}
|
875 |
|
|
|
876 |
|
|
|
877 |
|
|
|
878 |
|
|
#
|
879 |
|
|
# walk through the watch list and reset the time
|
880 |
|
|
# the service was last called
|
881 |
|
|
#
|
882 |
|
|
sub set_last_test {
|
883 |
|
|
my ($i, $k, $t);
|
884 |
|
|
$t = time;
|
885 |
|
|
foreach $k (keys %watch)
|
886 |
|
|
{
|
887 |
|
|
foreach my $service (keys %{$watch{$k}})
|
888 |
|
|
{
|
889 |
|
|
$watch{$k}->{$service}->{"_timer"} = $watch{$k}->{$service}->{"interval"};
|
890 |
|
|
}
|
891 |
|
|
}
|
892 |
|
|
|
893 |
|
|
}
|
894 |
|
|
|
895 |
|
|
|
896 |
|
|
#
|
897 |
|
|
# parse configuration file
|
898 |
|
|
#
|
899 |
|
|
# build the following data structures:
|
900 |
|
|
#
|
901 |
|
|
# %group
|
902 |
|
|
# each element of %group is an array of hostnames
|
903 |
|
|
# group records are terminated by a blank line in the
|
904 |
|
|
# configuration file
|
905 |
|
|
# %watch{"group"}->{"service"}->{"variable"} = value
|
906 |
|
|
# %alias
|
907 |
|
|
#
|
908 |
|
|
sub read_cf {
|
909 |
|
|
my ($CF, $commit) = @_;
|
910 |
|
|
my ($var, $watchgroup, $ingroup, $curgroup, $inwatch,
|
911 |
|
|
$args, $hosts, %disabled, $h, $i,
|
912 |
|
|
$inalias, $curalias, $inview, $curview);
|
913 |
|
|
my ($sref, $pref);
|
914 |
|
|
my ($service, $period);
|
915 |
|
|
my ($authtype, @authtypes);
|
916 |
|
|
my $line_num = 0;
|
917 |
|
|
|
918 |
|
|
#
|
919 |
|
|
# parse configuration file
|
920 |
|
|
#
|
921 |
|
|
if (exists($opt{"M"}) || $CF =~ /\.m4$/)
|
922 |
|
|
{
|
923 |
|
|
my $m4 = "m4";
|
924 |
|
|
$m4 = $opt{"M"} if (defined($opt{"M"}));
|
925 |
|
|
return "could not open m4 pipe of cf file: $CF: $!"
|
926 |
|
|
if (!open (CFG, "$m4 $CF |"));
|
927 |
|
|
}
|
928 |
|
|
|
929 |
|
|
else
|
930 |
|
|
{
|
931 |
|
|
return "could not open cf file: $CF: $!"
|
932 |
|
|
if (!open (CFG, $CF));
|
933 |
|
|
}
|
934 |
|
|
|
935 |
|
|
#
|
936 |
|
|
# buffers to hold the new un-committed config
|
937 |
|
|
#
|
938 |
|
|
my %new_alias = ();
|
939 |
|
|
my %new_views = ();
|
940 |
|
|
my %new_CF = %CF;
|
941 |
|
|
my %new_groups;
|
942 |
|
|
my %new_watch;
|
943 |
|
|
|
944 |
|
|
my %is_watch;
|
945 |
|
|
|
946 |
|
|
my $servnum = 0;
|
947 |
|
|
|
948 |
|
|
my $DEP_BEHAVIOR = "a";
|
949 |
|
|
my $DEP_MEMORY = 0;
|
950 |
|
|
my $UNACK_SUMMARY = 0;
|
951 |
|
|
|
952 |
|
|
my $incomplete_line = 0;
|
953 |
|
|
my $linepart = "";
|
954 |
|
|
my $l = "";
|
955 |
|
|
my $acc_line = "";
|
956 |
|
|
|
957 |
|
|
for (;;)
|
958 |
|
|
{
|
959 |
|
|
#
|
960 |
|
|
# read in a logical "line", which may span actual lines
|
961 |
|
|
#
|
962 |
|
|
do
|
963 |
|
|
{
|
964 |
|
|
$line_num++;
|
965 |
|
|
last if (!defined ($linepart = <CFG>));
|
966 |
|
|
next if $linepart =~ /^\s*#/;
|
967 |
|
|
|
968 |
|
|
#
|
969 |
|
|
# accumulate multi-line lines (ones which are \-escaped)
|
970 |
|
|
#
|
971 |
|
|
if ($incomplete_line) { $linepart =~ s/^\s*//; }
|
972 |
|
|
|
973 |
|
|
if ($linepart =~ /^(.*)\\\s*$/)
|
974 |
|
|
{
|
975 |
|
|
$incomplete_line = 1;
|
976 |
|
|
$acc_line .= $1;
|
977 |
|
|
chomp $acc_line;
|
978 |
|
|
next;
|
979 |
|
|
}
|
980 |
|
|
|
981 |
|
|
else
|
982 |
|
|
{
|
983 |
|
|
$acc_line .= $linepart;
|
984 |
|
|
}
|
985 |
|
|
|
986 |
|
|
$l = $acc_line;
|
987 |
|
|
$acc_line = "";
|
988 |
|
|
|
989 |
|
|
chomp $l;
|
990 |
|
|
$l =~ s/^\s*//;
|
991 |
|
|
$l =~ s/\s*$//;
|
992 |
|
|
|
993 |
|
|
$incomplete_line = 0;
|
994 |
|
|
$linepart = "";
|
995 |
|
|
};
|
996 |
|
|
|
997 |
|
|
#
|
998 |
|
|
# global variables which can be overriden by the command line
|
999 |
|
|
#
|
1000 |
|
|
if (!$inwatch && $l =~ /^(\w+) \s* = \s* (.*) \s*$/ix)
|
1001 |
|
|
{
|
1002 |
|
|
if ($1 eq "alertdir") {
|
1003 |
|
|
$new_CF{"ALERTDIR"} = $2;
|
1004 |
|
|
|
1005 |
|
|
} elsif ($1 eq "basedir") {
|
1006 |
|
|
$new_CF{"BASEDIR"} = $2;
|
1007 |
|
|
$new_CF{"BASEDIR"} = "$PWD/$new_CF{BASEDIR}" if ($new_CF{"BASEDIR"} !~ m{^/});
|
1008 |
|
|
$new_CF{"BASEDIR"} =~ s{/$}{};
|
1009 |
|
|
|
1010 |
|
|
} elsif ($1 eq "cfbasedir") {
|
1011 |
|
|
$new_CF{"CFBASEDIR"} = $2;
|
1012 |
|
|
$new_CF{"CFBASEDIR"} = "$PWD/$new_CF{CFBASEDIR}" if ($new_CF{"CFBASEDIR"} !~ m{^/});
|
1013 |
|
|
$new_CF{"CFBASEDIR"} =~ s{/$}{};
|
1014 |
|
|
|
1015 |
|
|
} elsif ($1 eq "mondir") {
|
1016 |
|
|
$new_CF{"SCRIPTDIR"} = $2;
|
1017 |
|
|
|
1018 |
|
|
} elsif ($1 eq "logdir") {
|
1019 |
|
|
$new_CF{"LOGDIR"} = $2;
|
1020 |
|
|
|
1021 |
|
|
} elsif ($1 eq "histlength") {
|
1022 |
|
|
$new_CF{"MAX_KEEP"} = $2;
|
1023 |
|
|
|
1024 |
|
|
} elsif ($1 eq "serverport") {
|
1025 |
|
|
$new_CF{"SERVPORT"} = $2;
|
1026 |
|
|
|
1027 |
|
|
} elsif ($1 eq "trapport") {
|
1028 |
|
|
$new_CF{"TRAPPORT"} = $2;
|
1029 |
|
|
|
1030 |
|
|
} elsif ($1 eq "serverbind") {
|
1031 |
|
|
$new_CF{"SERVERBIND"} = $2;
|
1032 |
|
|
|
1033 |
|
|
} elsif ($1 eq "clientallow") {
|
1034 |
|
|
$new_CF{"CLIENTALLOW"}= $2;
|
1035 |
|
|
|
1036 |
|
|
} elsif ($1 eq "trapbind") {
|
1037 |
|
|
$new_CF{"TRAPBIND"} = $2;
|
1038 |
|
|
|
1039 |
|
|
} elsif ($1 eq "pidfile") {
|
1040 |
|
|
$new_CF{"PIDFILE"} = $2;
|
1041 |
|
|
|
1042 |
|
|
} elsif ($1 eq "randstart") {
|
1043 |
|
|
$new_CF{"RANDSTART"} = dhmstos($2);
|
1044 |
|
|
if (!defined ($new_CF{"RANDSTART"})) {
|
1045 |
|
|
close (CFG);
|
1046 |
|
|
return "cf error: bad value '$2' for randstart option (syntax: randstart = timeval), line $line_num";
|
1047 |
|
|
}
|
1048 |
|
|
|
1049 |
|
|
} elsif ($1 eq "maxprocs") {
|
1050 |
|
|
$new_CF{"MAXPROCS"} = $2;
|
1051 |
|
|
|
1052 |
|
|
} elsif ($1 eq "statedir") {
|
1053 |
|
|
$new_CF{"STATEDIR"} = $2;
|
1054 |
|
|
|
1055 |
|
|
} elsif ($1 eq "authfile") {
|
1056 |
|
|
$new_CF{"AUTHFILE"} = $2;
|
1057 |
|
|
if (! -r $new_CF{"AUTHFILE"}) {
|
1058 |
|
|
close (CFG);
|
1059 |
|
|
return "cf error: authfile '$2' does not exist or is not readable, line $line_num";
|
1060 |
|
|
}
|
1061 |
|
|
|
1062 |
|
|
} elsif ($1 eq "authtype") {
|
1063 |
|
|
$new_CF{"AUTHTYPE"} = $2;
|
1064 |
|
|
@authtypes = split(' ' , $new_CF{"AUTHTYPE"}) ;
|
1065 |
|
|
foreach $authtype (@authtypes) {
|
1066 |
|
|
if ($authtype eq "pam") {
|
1067 |
|
|
eval 'use Authen::PAM qw(:constants);' ;
|
1068 |
|
|
if ($@ ne "") {
|
1069 |
|
|
close (CFG);
|
1070 |
|
|
return "cf error: could not use PAM authentication: $@";
|
1071 |
|
|
}
|
1072 |
|
|
}
|
1073 |
|
|
}
|
1074 |
|
|
|
1075 |
|
|
} elsif ($1 eq "pamservice") {
|
1076 |
|
|
$new_CF{"PAMSERVICE"} = $2;
|
1077 |
|
|
|
1078 |
|
|
} elsif ($1 eq "userfile") {
|
1079 |
|
|
$new_CF{"USERFILE"} = $2;
|
1080 |
|
|
if (! -r $new_CF{"USERFILE"}) {
|
1081 |
|
|
close (CFG);
|
1082 |
|
|
return "cf error: userfile '$2' does not exist or is not readable, line $line_num";
|
1083 |
|
|
}
|
1084 |
|
|
|
1085 |
|
|
} elsif ($1 eq "historicfile") {
|
1086 |
|
|
$new_CF{"HISTORICFILE"} = $2;
|
1087 |
|
|
|
1088 |
|
|
} elsif ($1 eq "historictime") {
|
1089 |
|
|
$new_CF{"HISTORICTIME"} = dhmstos($2);
|
1090 |
|
|
if (!defined $new_CF{"HISTORICTIME"}) {
|
1091 |
|
|
close (CFG);
|
1092 |
|
|
return "cf error: bad value '$2' for historictime command (syntax: historictime = timeval), line $line_num";
|
1093 |
|
|
}
|
1094 |
|
|
|
1095 |
|
|
} elsif ($1 eq "cltimeout") {
|
1096 |
|
|
$new_CF{"CLIENT_TIMEOUT"} = dhmstos($2);
|
1097 |
|
|
if (!defined ($new_CF{"CLIENT_TIMEOUT"})) {
|
1098 |
|
|
close (CFG);
|
1099 |
|
|
return "cf error: bad value '$2' for cltimeout command (syntax: cltimeout = secs), line $line_num";
|
1100 |
|
|
}
|
1101 |
|
|
|
1102 |
|
|
} elsif ($1 eq "monerrfile") {
|
1103 |
|
|
$new_CF{"MONERRFILE"} = $2;
|
1104 |
|
|
|
1105 |
|
|
} elsif ($1 eq "dtlogfile") {
|
1106 |
|
|
$new_CF{"DTLOGFILE"} = $2;
|
1107 |
|
|
|
1108 |
|
|
} elsif ($1 eq "dtlogging") {
|
1109 |
|
|
$new_CF{"DTLOGGING"} = 0;
|
1110 |
|
|
if ($2 == 1 || $2 eq "yes" || $2 eq "true") {
|
1111 |
|
|
$new_CF{"DTLOGGING"} = 1;
|
1112 |
|
|
}
|
1113 |
|
|
|
1114 |
|
|
} elsif ($1 eq "dep_recur_limit") {
|
1115 |
|
|
$new_CF{"DEP_RECUR_LIMIT"} = $2;
|
1116 |
|
|
|
1117 |
|
|
} elsif ($1 eq "dep_behavior") {
|
1118 |
|
|
if ($2 ne "m" && $2 ne "a" && $2 ne "hm") {
|
1119 |
|
|
close (CFG);
|
1120 |
|
|
return "cf error: unknown dependency behavior '$2', line $line_num";
|
1121 |
|
|
}
|
1122 |
|
|
$DEP_BEHAVIOR = $2;
|
1123 |
|
|
|
1124 |
|
|
} elsif ($1 eq "dep_memory") {
|
1125 |
|
|
my $memory = dhmstos($2);
|
1126 |
|
|
if (!defined $memory) {
|
1127 |
|
|
close (CFG);
|
1128 |
|
|
return "cf error: bad value '$2' for dep_memory option (syntax: dep_memory = timeval), line $line_num";
|
1129 |
|
|
}
|
1130 |
|
|
$DEP_MEMORY = $memory;
|
1131 |
|
|
|
1132 |
|
|
} elsif ($1 eq "unack_summary") {
|
1133 |
|
|
if (defined $2) {
|
1134 |
|
|
if ($2 =~ /y(es)?/i) {
|
1135 |
|
|
$UNACK_SUMMARY = 1;
|
1136 |
|
|
} elsif ($2 =~ /n(o)?/i) {
|
1137 |
|
|
$UNACK_SUMMARY = 0;
|
1138 |
|
|
} elsif ($2 eq "0" || $2 eq "1") {
|
1139 |
|
|
$UNACK_SUMMARY = $2;
|
1140 |
|
|
} else {
|
1141 |
|
|
return "cf error: invalid unack_summary value '$2' (syntax: unack_summary [0|1|y|yes|n|no])";
|
1142 |
|
|
}
|
1143 |
|
|
} else {
|
1144 |
|
|
$UNACK_SUMMARY = 1;
|
1145 |
|
|
}
|
1146 |
|
|
|
1147 |
|
|
} elsif ($1 eq "syslog_facility") {
|
1148 |
|
|
$new_CF{"SYSLOG_FACILITY"} = $2;
|
1149 |
|
|
|
1150 |
|
|
} elsif ($1 eq "startupalerts_on_reset") {
|
1151 |
|
|
if ($2 =~ /^1|yes|true|on$/i) {
|
1152 |
|
|
$new_CF{"STARTUPALERTS_ON_RESET"} = 1;
|
1153 |
|
|
} else {
|
1154 |
|
|
$new_CF{"STARTUPALERTS_ON_RESET"} = 0;
|
1155 |
|
|
}
|
1156 |
|
|
|
1157 |
|
|
} elsif ($1 eq "monremote") {
|
1158 |
|
|
$new_CF{"MONREMOTE"} = $2;
|
1159 |
|
|
|
1160 |
|
|
} elsif ($1 eq "exclude_period") {
|
1161 |
|
|
if (inPeriod (time, $2) == -1)
|
1162 |
|
|
{
|
1163 |
|
|
close (CFG);
|
1164 |
|
|
return "cf error: malformed exclude_period '$2' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
|
1165 |
|
|
}
|
1166 |
|
|
$new_CF{"EXCLUDE_PERIOD"} = $2;
|
1167 |
|
|
} else {
|
1168 |
|
|
close (CFG);
|
1169 |
|
|
return "cf error: unknown variable '$1', line $line_num";
|
1170 |
|
|
}
|
1171 |
|
|
|
1172 |
|
|
next;
|
1173 |
|
|
}
|
1174 |
|
|
|
1175 |
|
|
#
|
1176 |
|
|
# end of record
|
1177 |
|
|
#
|
1178 |
|
|
if ($l eq "")
|
1179 |
|
|
{
|
1180 |
|
|
$ingroup = 0;
|
1181 |
|
|
$inalias = 0;
|
1182 |
|
|
$inwatch = 0;
|
1183 |
|
|
$period = 0;
|
1184 |
|
|
$inview = 0;
|
1185 |
|
|
|
1186 |
|
|
$curgroup = "";
|
1187 |
|
|
$curalias = "";
|
1188 |
|
|
$watchgroup = "";
|
1189 |
|
|
|
1190 |
|
|
$servnum = 0;
|
1191 |
|
|
next;
|
1192 |
|
|
}
|
1193 |
|
|
|
1194 |
|
|
#
|
1195 |
|
|
# hostgroup record
|
1196 |
|
|
#
|
1197 |
|
|
if ($l =~ /^hostgroup\s+([a-zA-Z0-9_.-]+)\s*(.*)/)
|
1198 |
|
|
{
|
1199 |
|
|
$curgroup = $1;
|
1200 |
|
|
|
1201 |
|
|
$ingroup = 1;
|
1202 |
|
|
$inview = 0;
|
1203 |
|
|
$inalias = 0;
|
1204 |
|
|
$inwatch = 0;
|
1205 |
|
|
$period = 0;
|
1206 |
|
|
|
1207 |
|
|
|
1208 |
|
|
$hosts = $2;
|
1209 |
|
|
%disabled = ();
|
1210 |
|
|
|
1211 |
|
|
foreach $h (grep (/^\*/, @{$groups{$curgroup}}))
|
1212 |
|
|
{
|
1213 |
|
|
# We have to make $i = $h because $h is actually
|
1214 |
|
|
# a pointer to %groups and will modify it.
|
1215 |
|
|
$i = $h;
|
1216 |
|
|
$i =~ s/^\*//;
|
1217 |
|
|
$disabled{$i} = 1;
|
1218 |
|
|
}
|
1219 |
|
|
|
1220 |
|
|
@{$new_groups{$curgroup}} = split(/\s+/, $hosts);
|
1221 |
|
|
|
1222 |
|
|
#
|
1223 |
|
|
# keep hosts which were previously disabled
|
1224 |
|
|
#
|
1225 |
|
|
for ($i=0;$i<@{$new_groups{$curgroup}};$i++)
|
1226 |
|
|
{
|
1227 |
|
|
$new_groups{$curgroup}[$i] = "*$new_groups{$curgroup}[$i]"
|
1228 |
|
|
if ($disabled{$new_groups{$curgroup}[$i]});
|
1229 |
|
|
}
|
1230 |
|
|
|
1231 |
|
|
next;
|
1232 |
|
|
}
|
1233 |
|
|
|
1234 |
|
|
if ($ingroup)
|
1235 |
|
|
{
|
1236 |
|
|
push (@{$new_groups{$curgroup}}, split(/\s+/, $l));
|
1237 |
|
|
|
1238 |
|
|
for ($i=0;$i<@{$new_groups{$curgroup}};$i++)
|
1239 |
|
|
{
|
1240 |
|
|
$new_groups{$curgroup}[$i] = "*$new_groups{$curgroup}[$i]"
|
1241 |
|
|
if ($disabled{$new_groups{$curgroup}[$i]});
|
1242 |
|
|
}
|
1243 |
|
|
|
1244 |
|
|
next;
|
1245 |
|
|
}
|
1246 |
|
|
|
1247 |
|
|
#
|
1248 |
|
|
# alias record
|
1249 |
|
|
#
|
1250 |
|
|
if ($l =~ /^alias\s+([a-zA-Z0-9_.-]+)\s*$/)
|
1251 |
|
|
{
|
1252 |
|
|
$inalias = 1;
|
1253 |
|
|
$inview = 0;
|
1254 |
|
|
$ingroup = 0;
|
1255 |
|
|
$inwatch = 0;
|
1256 |
|
|
$period = 0;
|
1257 |
|
|
|
1258 |
|
|
$curalias = $1;
|
1259 |
|
|
next;
|
1260 |
|
|
}
|
1261 |
|
|
|
1262 |
|
|
if ($inalias)
|
1263 |
|
|
{
|
1264 |
|
|
if ($l =~ /\A(.*)\Z/)
|
1265 |
|
|
{
|
1266 |
|
|
push (@{$new_alias{$curalias}}, $1);
|
1267 |
|
|
next;
|
1268 |
|
|
}
|
1269 |
|
|
}
|
1270 |
|
|
|
1271 |
|
|
#
|
1272 |
|
|
# view record
|
1273 |
|
|
#
|
1274 |
|
|
if ($l =~ /^view\s+([a-zA-Z0-9_.-]+)\s+(.*)$/)
|
1275 |
|
|
{
|
1276 |
|
|
$inview = 1;
|
1277 |
|
|
$inalias = 0;
|
1278 |
|
|
$ingroup = 0;
|
1279 |
|
|
$inwatch = 0;
|
1280 |
|
|
$period = 0;
|
1281 |
|
|
|
1282 |
|
|
$curview = $1;
|
1283 |
|
|
$new_views{$curview}={};
|
1284 |
|
|
|
1285 |
|
|
foreach (split(/\s+/, $2)) {
|
1286 |
|
|
$new_views{$curview}->{$_} = 1;
|
1287 |
|
|
};
|
1288 |
|
|
next;
|
1289 |
|
|
}
|
1290 |
|
|
|
1291 |
|
|
if ($inview)
|
1292 |
|
|
{
|
1293 |
|
|
foreach (split(/\s+/, $l)) {
|
1294 |
|
|
$new_views{$curview}->{$_} = 1;
|
1295 |
|
|
};
|
1296 |
|
|
next;
|
1297 |
|
|
}
|
1298 |
|
|
|
1299 |
|
|
#
|
1300 |
|
|
# watch record
|
1301 |
|
|
#
|
1302 |
|
|
if ($l =~ /^watch\s+([a-zA-Z0-9_.-]+)\s*/)
|
1303 |
|
|
{
|
1304 |
|
|
$watchgroup = $1;
|
1305 |
|
|
$inwatch = 1;
|
1306 |
|
|
$inview = 0;
|
1307 |
|
|
$inalias = 0;
|
1308 |
|
|
$ingroup = 0;
|
1309 |
|
|
$period = 0;
|
1310 |
|
|
|
1311 |
|
|
if (!defined ($new_groups{$watchgroup}))
|
1312 |
|
|
{
|
1313 |
|
|
#
|
1314 |
|
|
# This hostgroup doesn't exist yet, we'll create it and warn
|
1315 |
|
|
#
|
1316 |
|
|
@{$new_groups{$watchgroup}} = ($watchgroup);
|
1317 |
|
|
print STDERR "Warning: watch group $watchgroup defined with no corresponding hostgroup.\n";
|
1318 |
|
|
}
|
1319 |
|
|
if ($new_watch{$watchgroup})
|
1320 |
|
|
{
|
1321 |
|
|
close (CFG);
|
1322 |
|
|
return "cf error: watch '$watchgroup' already defined, line $line_num";
|
1323 |
|
|
}
|
1324 |
|
|
|
1325 |
|
|
$curgroup = "";
|
1326 |
|
|
$service = "";
|
1327 |
|
|
|
1328 |
|
|
next;
|
1329 |
|
|
}
|
1330 |
|
|
|
1331 |
|
|
if ($inwatch)
|
1332 |
|
|
{
|
1333 |
|
|
#
|
1334 |
|
|
# env variables
|
1335 |
|
|
#
|
1336 |
|
|
if ($l =~ /^([A-Z_][A-Z0-9_]*)=(.*)/)
|
1337 |
|
|
{
|
1338 |
|
|
if ($service eq "") {
|
1339 |
|
|
close (CFG);
|
1340 |
|
|
return "cf error: environment variable defined without a service, line $line_num";
|
1341 |
|
|
}
|
1342 |
|
|
$new_watch{$watchgroup}->{$service}->{"ENV"}->{$1} = $2;
|
1343 |
|
|
|
1344 |
|
|
next;
|
1345 |
|
|
}
|
1346 |
|
|
|
1347 |
|
|
#
|
1348 |
|
|
# non-env variables
|
1349 |
|
|
#
|
1350 |
|
|
else
|
1351 |
|
|
{
|
1352 |
|
|
$l =~ /^(\w+)\s*(.*)$/;
|
1353 |
|
|
$var = $1;
|
1354 |
|
|
$args = $2;
|
1355 |
|
|
}
|
1356 |
|
|
|
1357 |
|
|
#
|
1358 |
|
|
# service entry
|
1359 |
|
|
#
|
1360 |
|
|
if ($var eq "service")
|
1361 |
|
|
{
|
1362 |
|
|
$service = $args;
|
1363 |
|
|
|
1364 |
|
|
if ($service !~ /^[a-zA-Z0-9_.-]+$/) {
|
1365 |
|
|
close (CFG);
|
1366 |
|
|
return "cf error: invalid service tag '$args', line $line_num";
|
1367 |
|
|
}
|
1368 |
|
|
|
1369 |
|
|
elsif (exists $new_watch{$watchgroup}->{$service})
|
1370 |
|
|
{
|
1371 |
|
|
close (CFG);
|
1372 |
|
|
return "cf error: service $service already defined for watch group $watchgroup, line $line_num";
|
1373 |
|
|
}
|
1374 |
|
|
|
1375 |
|
|
$period = 0;
|
1376 |
|
|
$sref = \%{$new_watch{$watchgroup}->{$service}};
|
1377 |
|
|
$sref->{"service"} = $args;
|
1378 |
|
|
$sref->{"interval"} = undef;
|
1379 |
|
|
$sref->{"randskew"} = 0;
|
1380 |
|
|
$sref->{"redistribute"} = "";
|
1381 |
|
|
$sref->{"dep_behavior"} = $DEP_BEHAVIOR;
|
1382 |
|
|
$sref->{"dep_memory"} = $DEP_MEMORY;
|
1383 |
|
|
$sref->{"exclude_period"} = "";
|
1384 |
|
|
$sref->{"exclude_hosts"} = {};
|
1385 |
|
|
$sref->{"_op_status"} = $STAT_UNTESTED;
|
1386 |
|
|
$sref->{"_last_op_status"} = $STAT_UNTESTED;
|
1387 |
|
|
$sref->{"_ack"} = 0;
|
1388 |
|
|
$sref->{"_ack_comment"} = '';
|
1389 |
|
|
$sref->{"unack_summary"} = $UNACK_SUMMARY;
|
1390 |
|
|
$sref->{"_consec_failures"} = 0;
|
1391 |
|
|
$sref->{"_failure_count"} = 0 if (!defined($sref->{"_failure_count"}));
|
1392 |
|
|
$sref->{"_start_of_monitor"} = time if (!defined($sref->{"_start_of_monitor"}));
|
1393 |
|
|
$sref->{"_alert_count"} = 0 if (!defined($sref->{"_alert_count"}));
|
1394 |
|
|
$sref->{"_last_failure"} = 0 if (!defined($sref->{"_last_failure"}));
|
1395 |
|
|
$sref->{"_last_success"} = 0 if (!defined($sref->{"_last_success"}));
|
1396 |
|
|
$sref->{"_last_trap"} = 0 if (!defined($sref->{"_last_trap"}));
|
1397 |
|
|
$sref->{"_last_traphost"} = '' if (!defined($sref->{"_last_traphost"}));
|
1398 |
|
|
$sref->{"_exitval"} = "undef" if (!defined($sref->{"_exitval"}));
|
1399 |
|
|
$sref->{"_last_check"} = undef;
|
1400 |
|
|
#
|
1401 |
|
|
# -1 for _monitor_duration means no monitor has been run yet
|
1402 |
|
|
# so there is no duration data available
|
1403 |
|
|
#
|
1404 |
|
|
$sref->{"_monitor_duration"} = -1;
|
1405 |
|
|
$sref->{"_monitor_running"} = 0;
|
1406 |
|
|
$sref->{"_depend_status"} = undef;
|
1407 |
|
|
$sref->{"failure_interval"} = undef;
|
1408 |
|
|
$sref->{"_old_interval"} = undef;
|
1409 |
|
|
next;
|
1410 |
|
|
}
|
1411 |
|
|
|
1412 |
|
|
if ($service eq "")
|
1413 |
|
|
{
|
1414 |
|
|
close (CFG);
|
1415 |
|
|
return "cf error: need to specify service in watch record, line $line_num";
|
1416 |
|
|
}
|
1417 |
|
|
|
1418 |
|
|
|
1419 |
|
|
#
|
1420 |
|
|
# period definition
|
1421 |
|
|
#
|
1422 |
|
|
# for each service there can be one or more alert periods
|
1423 |
|
|
# this is stored as an array of hashes named
|
1424 |
|
|
# %{$watch{$watchgroup}->{$service}->{"periods"}}
|
1425 |
|
|
# each index for this hash is a unique tag for the period as
|
1426 |
|
|
# defined by the user or named after the period (such as
|
1427 |
|
|
# "wd {Mon-Fri} hr {7am-11pm}")
|
1428 |
|
|
#
|
1429 |
|
|
# the value of the hash is an array containing the list of alert commands
|
1430 |
|
|
# and arguments, so
|
1431 |
|
|
#
|
1432 |
|
|
# @alerts = @{$watch{$watchgroup}->{$service}->{"periods"}->{"TAG"}}
|
1433 |
|
|
#
|
1434 |
|
|
if ($var eq "period")
|
1435 |
|
|
{
|
1436 |
|
|
$period = 1;
|
1437 |
|
|
|
1438 |
|
|
my $periodstr;
|
1439 |
|
|
|
1440 |
|
|
if ($args =~ /^([a-z_]\w*) \s* : \s* (.*)$/ix)
|
1441 |
|
|
{
|
1442 |
|
|
$periodstr = $1;
|
1443 |
|
|
$args = $2;
|
1444 |
|
|
}
|
1445 |
|
|
|
1446 |
|
|
else
|
1447 |
|
|
{
|
1448 |
|
|
$periodstr = $args;
|
1449 |
|
|
}
|
1450 |
|
|
|
1451 |
|
|
if (exists $sref->{"periods"}->{$periodstr})
|
1452 |
|
|
{
|
1453 |
|
|
close (CFG);
|
1454 |
|
|
return "cf error: period '$periodstr' already defined for watch group $watchgroup service $service, line $line_num";
|
1455 |
|
|
}
|
1456 |
|
|
|
1457 |
|
|
$pref = \%{$sref->{"periods"}->{$periodstr}};
|
1458 |
|
|
|
1459 |
|
|
if (inPeriod (time, $args) == -1)
|
1460 |
|
|
{
|
1461 |
|
|
close (CFG);
|
1462 |
|
|
return "cf error: malformed period '$args' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
|
1463 |
|
|
}
|
1464 |
|
|
|
1465 |
|
|
$pref->{"period"} = $args;
|
1466 |
|
|
$pref->{"alertevery"} = 0;
|
1467 |
|
|
$pref->{"numalerts"} = 0;
|
1468 |
|
|
$pref->{"_alert_sent"} = 0;
|
1469 |
|
|
$pref->{"no_comp_alerts"} = 0;
|
1470 |
|
|
$pref->{"_no_comp_alerts_upalert_sent"} = 0;
|
1471 |
|
|
@{$pref->{"alerts"}} = ();
|
1472 |
|
|
@{$pref->{"upalerts"}} = ();
|
1473 |
|
|
@{$pref->{"ackalerts"}} = ();
|
1474 |
|
|
@{$pref->{"disablealerts"}} = ();
|
1475 |
|
|
@{$pref->{"startupalerts"}} = ();
|
1476 |
|
|
next;
|
1477 |
|
|
}
|
1478 |
|
|
|
1479 |
|
|
#
|
1480 |
|
|
# period variables
|
1481 |
|
|
#
|
1482 |
|
|
if ($period)
|
1483 |
|
|
{
|
1484 |
|
|
if ($var eq "alert")
|
1485 |
|
|
{
|
1486 |
|
|
push @{$pref->{"alerts"}}, $args;
|
1487 |
|
|
}
|
1488 |
|
|
|
1489 |
|
|
elsif ($var eq "ackalert")
|
1490 |
|
|
{
|
1491 |
|
|
push @{$pref->{"ackalerts"}}, $args;
|
1492 |
|
|
}
|
1493 |
|
|
|
1494 |
|
|
elsif ($var eq "disablealert")
|
1495 |
|
|
{
|
1496 |
|
|
push @{$pref->{"disablealerts"}}, $args;
|
1497 |
|
|
}
|
1498 |
|
|
|
1499 |
|
|
elsif ($var eq "upalert")
|
1500 |
|
|
{
|
1501 |
|
|
$sref->{"_upalert"} = 1;
|
1502 |
|
|
push @{$pref->{"upalerts"}}, $args;
|
1503 |
|
|
}
|
1504 |
|
|
|
1505 |
|
|
elsif ($var eq "startupalert")
|
1506 |
|
|
{
|
1507 |
|
|
push @{$pref->{"startupalerts"}}, $args;
|
1508 |
|
|
}
|
1509 |
|
|
|
1510 |
|
|
elsif ($var eq "alertevery")
|
1511 |
|
|
{
|
1512 |
|
|
$pref->{"_observe_detail"} = 0;
|
1513 |
|
|
$pref->{"_alertevery_strict"} = 0;
|
1514 |
|
|
$pref->{"_ignore_summary"} = 0;
|
1515 |
|
|
|
1516 |
|
|
if ($args =~ /(\S+) \s+ observe_detail \s*$/ix)
|
1517 |
|
|
{
|
1518 |
|
|
$pref->{"_observe_detail"} = 1;
|
1519 |
|
|
$args = $1;
|
1520 |
|
|
}
|
1521 |
|
|
|
1522 |
|
|
elsif ($args =~ /(\S+) \s+ ignore_summary \s*$/ix)
|
1523 |
|
|
{
|
1524 |
|
|
$pref->{"_ignore_summary"} = 1;
|
1525 |
|
|
$args = $1;
|
1526 |
|
|
}
|
1527 |
|
|
|
1528 |
|
|
#
|
1529 |
|
|
# for backawards-compatibility with <= 0.38.21
|
1530 |
|
|
#
|
1531 |
|
|
elsif ($args =~ /(\S+) \s+ summary/ix)
|
1532 |
|
|
{
|
1533 |
|
|
$args = $1;
|
1534 |
|
|
}
|
1535 |
|
|
|
1536 |
|
|
#
|
1537 |
|
|
# strict
|
1538 |
|
|
#
|
1539 |
|
|
elsif ($args =~ /(\S+) \s+ strict \s*$/ix)
|
1540 |
|
|
{
|
1541 |
|
|
$pref->{"_alertevery_strict"} = 1;
|
1542 |
|
|
$args = $1;
|
1543 |
|
|
}
|
1544 |
|
|
|
1545 |
|
|
if (!($args = dhmstos ($args))) {
|
1546 |
|
|
close (CFG);
|
1547 |
|
|
return "cf error: invalid time interval '$args' (syntax: alertevery {positive number}{smhd} [ strict | observe_detail | ignore_summary ]), line $line_num";
|
1548 |
|
|
}
|
1549 |
|
|
|
1550 |
|
|
$pref->{"alertevery"} = $args;
|
1551 |
|
|
next;
|
1552 |
|
|
}
|
1553 |
|
|
|
1554 |
|
|
elsif ($var eq "alertafter")
|
1555 |
|
|
{
|
1556 |
|
|
my ($p1, $p2);
|
1557 |
|
|
|
1558 |
|
|
#
|
1559 |
|
|
# alertafter NUM
|
1560 |
|
|
#
|
1561 |
|
|
if ($args =~ /^(\d+)$/)
|
1562 |
|
|
{
|
1563 |
|
|
$p1 = $1;
|
1564 |
|
|
$pref->{"alertafter_consec"} = $p1;
|
1565 |
|
|
}
|
1566 |
|
|
|
1567 |
|
|
#
|
1568 |
|
|
# alertafter timeval
|
1569 |
|
|
#
|
1570 |
|
|
elsif ($args =~ /^(\d+[hms])$/)
|
1571 |
|
|
{
|
1572 |
|
|
$p1 = $1;
|
1573 |
|
|
if (!($p1 = dhmstos ($p1)))
|
1574 |
|
|
{
|
1575 |
|
|
close (CFG);
|
1576 |
|
|
return "cf error: invalid time interval '$args' (syntax: alertafter = [{positive integer}] [{positive number}{smhd}]), line $line_num";
|
1577 |
|
|
}
|
1578 |
|
|
|
1579 |
|
|
$pref->{"alertafterival"} = $p1;
|
1580 |
|
|
$pref->{"_1stfailtime"} = 0;
|
1581 |
|
|
}
|
1582 |
|
|
|
1583 |
|
|
#
|
1584 |
|
|
# alertafter NUM timeval
|
1585 |
|
|
#
|
1586 |
|
|
elsif ($args =~ /(\d+)\s+(\d+[hms])$/)
|
1587 |
|
|
{
|
1588 |
|
|
($p1, $p2) = ($1, $2);
|
1589 |
|
|
if (($p1 - 1) * $sref->{"interval"} >= dhmstos($p2))
|
1590 |
|
|
{
|
1591 |
|
|
close (CFG);
|
1592 |
|
|
return "cf error: interval & alertafter not sensible. No alerts can be generated with those parameters, line $line_num";
|
1593 |
|
|
}
|
1594 |
|
|
$pref->{"alertafter"} = $p1;
|
1595 |
|
|
$pref->{"alertafterival"} = dhmstos ($p2);
|
1596 |
|
|
|
1597 |
|
|
$pref->{"_1stfailtime"} = 0;
|
1598 |
|
|
$pref->{"_failcount"} = 0;
|
1599 |
|
|
}
|
1600 |
|
|
|
1601 |
|
|
else
|
1602 |
|
|
{
|
1603 |
|
|
close (CFG);
|
1604 |
|
|
return "cf error: invalid interval specification '$args', line $line_num";
|
1605 |
|
|
}
|
1606 |
|
|
}
|
1607 |
|
|
|
1608 |
|
|
elsif ($var eq "upalertafter")
|
1609 |
|
|
{
|
1610 |
|
|
if (!($args = dhmstos ($args))) {
|
1611 |
|
|
close (CFG);
|
1612 |
|
|
return "cf error: invalid upalertafter specification '$args' (syntax: upalertafter = {positive number}{smhd}), line $line_num";
|
1613 |
|
|
}
|
1614 |
|
|
|
1615 |
|
|
$pref->{"upalertafter"} = $args;
|
1616 |
|
|
}
|
1617 |
|
|
|
1618 |
|
|
elsif ($var eq "numalerts")
|
1619 |
|
|
{
|
1620 |
|
|
if ($args !~ /^\d+$/) {
|
1621 |
|
|
close (CFG);
|
1622 |
|
|
return "cf error: -numeric arg '$args' (syntax: numalerts = {positive integer}, line $line_num";
|
1623 |
|
|
}
|
1624 |
|
|
$pref->{"numalerts"} = $args;
|
1625 |
|
|
next;
|
1626 |
|
|
}
|
1627 |
|
|
|
1628 |
|
|
elsif ($var eq "no_comp_alerts")
|
1629 |
|
|
{
|
1630 |
|
|
$pref->{"no_comp_alerts"} = 1;
|
1631 |
|
|
next;
|
1632 |
|
|
}
|
1633 |
|
|
|
1634 |
|
|
elsif ($var eq "alerts_dont_count")
|
1635 |
|
|
{
|
1636 |
|
|
$pref->{"alerts_dont_count"} = 1;
|
1637 |
|
|
next;
|
1638 |
|
|
}
|
1639 |
|
|
|
1640 |
|
|
elsif ($var eq 'alertexitrange') {
|
1641 |
|
|
if ($args !~ /^\s*(\d+|\d+-\d+)\s*$/) {
|
1642 |
|
|
close (CFG);
|
1643 |
|
|
return "cf error: invalid exit code range '$args', line $line_num";
|
1644 |
|
|
}
|
1645 |
|
|
$pref->{"alertexitrange"} = $args;
|
1646 |
|
|
}
|
1647 |
|
|
|
1648 |
|
|
else
|
1649 |
|
|
{
|
1650 |
|
|
close (CFG);
|
1651 |
|
|
return "cf error: unknown syntax [$l], line $line_num";
|
1652 |
|
|
}
|
1653 |
|
|
|
1654 |
|
|
}
|
1655 |
|
|
|
1656 |
|
|
#
|
1657 |
|
|
# non-period variables
|
1658 |
|
|
#
|
1659 |
|
|
elsif (!$period)
|
1660 |
|
|
{
|
1661 |
|
|
if ($var eq "interval")
|
1662 |
|
|
{
|
1663 |
|
|
if (!($args = dhmstos ($args))) {
|
1664 |
|
|
close (CFG);
|
1665 |
|
|
return "cf error: invalid time interval '$args' (syntax: interval = {positive number}{smhd}), line $line_num";
|
1666 |
|
|
}
|
1667 |
|
|
}
|
1668 |
|
|
|
1669 |
|
|
elsif ($var eq "failure_interval")
|
1670 |
|
|
{
|
1671 |
|
|
if (!($args = dhmstos ($args))) {
|
1672 |
|
|
close (CFG);
|
1673 |
|
|
return "cf error: invalid interval '$args' (syntax: failure_interval = {positive number}{smhd}), line $line_num";
|
1674 |
|
|
}
|
1675 |
|
|
}
|
1676 |
|
|
|
1677 |
|
|
elsif ($var eq "monitor")
|
1678 |
|
|
{
|
1679 |
|
|
# valid
|
1680 |
|
|
}
|
1681 |
|
|
|
1682 |
|
|
elsif ($var eq "redistribute")
|
1683 |
|
|
{
|
1684 |
|
|
# valid
|
1685 |
|
|
}
|
1686 |
|
|
|
1687 |
|
|
elsif ($var eq "allow_empty_group")
|
1688 |
|
|
{
|
1689 |
|
|
# valid
|
1690 |
|
|
}
|
1691 |
|
|
|
1692 |
|
|
elsif ($var eq "description")
|
1693 |
|
|
{
|
1694 |
|
|
# valid
|
1695 |
|
|
}
|
1696 |
|
|
|
1697 |
|
|
elsif ($var eq "unack_summary")
|
1698 |
|
|
{
|
1699 |
|
|
if (defined $args) {
|
1700 |
|
|
if ($args =~ /y(es)?/i) {
|
1701 |
|
|
$args = 1;
|
1702 |
|
|
} elsif ($args =~ /n(o)?/i) {
|
1703 |
|
|
$args = 0;
|
1704 |
|
|
}
|
1705 |
|
|
if ($args eq "0" || $args eq "1") {
|
1706 |
|
|
$sref->{"unack_summary"} = $args;
|
1707 |
|
|
} else {
|
1708 |
|
|
return "cf error: invalid unack_summary value '$args' (syntax: unack_summary [0|1|y|yes|n|no])";
|
1709 |
|
|
}
|
1710 |
|
|
} else {
|
1711 |
|
|
$sref->{"unack_summary"} = 1;
|
1712 |
|
|
}
|
1713 |
|
|
next;
|
1714 |
|
|
}
|
1715 |
|
|
|
1716 |
|
|
elsif ($var eq "traptimeout")
|
1717 |
|
|
{
|
1718 |
|
|
if (!($args = dhmstos ($args))) {
|
1719 |
|
|
close (CFG);
|
1720 |
|
|
return "cf error: invalid traptimeout interval '$args' (syntax: traptimeout = {positive number}{smhd}), line $line_num";
|
1721 |
|
|
}
|
1722 |
|
|
$sref->{"_trap_timer"} = $args;
|
1723 |
|
|
}
|
1724 |
|
|
|
1725 |
|
|
elsif ($var eq "trapduration")
|
1726 |
|
|
{
|
1727 |
|
|
if (!($args = dhmstos ($args))) {
|
1728 |
|
|
close (CFG);
|
1729 |
|
|
return "cf error: invalid trapduration interval '$args' (syntax: trapduration = {positive number}{smhd}), line $line_num";
|
1730 |
|
|
}
|
1731 |
|
|
}
|
1732 |
|
|
|
1733 |
|
|
elsif ($var eq "randskew")
|
1734 |
|
|
{
|
1735 |
|
|
if (!($args = dhmstos ($args))) {
|
1736 |
|
|
close (CFG);
|
1737 |
|
|
return "cf error: invalid randskew time interval '$args' (syntax: randskew = {positive number}{smhd}), line $line_num";
|
1738 |
|
|
}
|
1739 |
|
|
}
|
1740 |
|
|
|
1741 |
|
|
elsif ($var eq "dep_behavior")
|
1742 |
|
|
{
|
1743 |
|
|
if ($args ne "m" && $args ne "a" && $args ne "hm")
|
1744 |
|
|
{
|
1745 |
|
|
close (CFG);
|
1746 |
|
|
return "cf error: unknown dependency behavior '$args' (syntax: dep_behavior = {m|a}), line $line_num";
|
1747 |
|
|
}
|
1748 |
|
|
}
|
1749 |
|
|
|
1750 |
|
|
elsif ($var eq "dep_memory")
|
1751 |
|
|
{
|
1752 |
|
|
my $timeval = dhmstos($args);
|
1753 |
|
|
if (!$timeval) {
|
1754 |
|
|
close (CFG);
|
1755 |
|
|
return "cf error: bad value '$args' for dep_memory option (syntax: dep_memory = timeval), line $line_num";
|
1756 |
|
|
}
|
1757 |
|
|
$args = $timeval;
|
1758 |
|
|
}
|
1759 |
|
|
|
1760 |
|
|
elsif ($var eq "depend")
|
1761 |
|
|
{
|
1762 |
|
|
$args =~ s/SELF:/$watchgroup:/g;
|
1763 |
|
|
}
|
1764 |
|
|
|
1765 |
|
|
elsif ($var eq "alertdepend")
|
1766 |
|
|
{
|
1767 |
|
|
$args =~ s/SELF:/$watchgroup:/g;
|
1768 |
|
|
}
|
1769 |
|
|
|
1770 |
|
|
elsif ($var eq "monitordepend")
|
1771 |
|
|
{
|
1772 |
|
|
$args =~ s/SELF:/$watchgroup:/g;
|
1773 |
|
|
}
|
1774 |
|
|
|
1775 |
|
|
elsif ($var eq "hostdepend")
|
1776 |
|
|
{
|
1777 |
|
|
$args =~ s/SELF:/$watchgroup:/g;
|
1778 |
|
|
}
|
1779 |
|
|
|
1780 |
|
|
elsif ($var eq "exclude_hosts")
|
1781 |
|
|
{
|
1782 |
|
|
my $ex = {};
|
1783 |
|
|
foreach my $h (split (/\s+/, $args))
|
1784 |
|
|
{
|
1785 |
|
|
$ex->{$h} = 1;
|
1786 |
|
|
}
|
1787 |
|
|
$args = $ex;
|
1788 |
|
|
}
|
1789 |
|
|
|
1790 |
|
|
elsif ($var eq "exclude_period")
|
1791 |
|
|
{
|
1792 |
|
|
if (inPeriod (time, $args) == -1)
|
1793 |
|
|
{
|
1794 |
|
|
close (CFG);
|
1795 |
|
|
return "cf error: malformed exclude_period '$args' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
|
1796 |
|
|
}
|
1797 |
|
|
}
|
1798 |
|
|
|
1799 |
|
|
else
|
1800 |
|
|
{
|
1801 |
|
|
close (CFG);
|
1802 |
|
|
return "cf error: unknown syntax [$l], line $line_num";
|
1803 |
|
|
}
|
1804 |
|
|
|
1805 |
|
|
$sref->{$var} = $args;
|
1806 |
|
|
}
|
1807 |
|
|
|
1808 |
|
|
else
|
1809 |
|
|
{
|
1810 |
|
|
close (CFG);
|
1811 |
|
|
return "cf error: unknown syntax outside of period section [$l], line $line_num";
|
1812 |
|
|
}
|
1813 |
|
|
}
|
1814 |
|
|
|
1815 |
|
|
next;
|
1816 |
|
|
}
|
1817 |
|
|
|
1818 |
|
|
close (CFG) || return "Could not open pipe to m4 (check that m4 is properly installed and in your PATH): $!";
|
1819 |
|
|
|
1820 |
|
|
#
|
1821 |
|
|
# Go through each defined hostgroup and check that there is a
|
1822 |
|
|
# watch associated with that hostgroup record.
|
1823 |
|
|
#
|
1824 |
|
|
# hostgroups without associated watches are not a violation of
|
1825 |
|
|
# mon config syntax, but it's usually not what you want.
|
1826 |
|
|
#
|
1827 |
|
|
for (keys(%new_watch)) { $is_watch{$_} = 1 };
|
1828 |
|
|
foreach $watchgroup ( keys (%new_groups) ) {
|
1829 |
|
|
print STDERR "Warning: hostgroup $watchgroup has no watch assigned to it!\n" unless $is_watch{$watchgroup};
|
1830 |
|
|
}
|
1831 |
|
|
|
1832 |
|
|
#
|
1833 |
|
|
# no errors, commit new config if $commit was specified
|
1834 |
|
|
#
|
1835 |
|
|
return "" unless $commit;
|
1836 |
|
|
%views = %new_views;
|
1837 |
|
|
%alias = %new_alias;
|
1838 |
|
|
%groups = %new_groups;
|
1839 |
|
|
%watch = %new_watch;
|
1840 |
|
|
%CF = %new_CF;
|
1841 |
|
|
|
1842 |
|
|
"";
|
1843 |
|
|
}
|
1844 |
|
|
|
1845 |
|
|
|
1846 |
|
|
#
|
1847 |
|
|
# convert a string like "20m" into seconds
|
1848 |
|
|
#
|
1849 |
|
|
sub dhmstos {
|
1850 |
|
|
my ($str) = @_;
|
1851 |
|
|
my ($s);
|
1852 |
|
|
|
1853 |
|
|
$str = lc ($str);
|
1854 |
|
|
|
1855 |
|
|
if ($str =~ /^\s*(\d+(?:\.\d+)?)([dhms])\s*$/i) {
|
1856 |
|
|
if ($2 eq "m") {
|
1857 |
|
|
$s = $1 * 60;
|
1858 |
|
|
} elsif ($2 eq "h") {
|
1859 |
|
|
$s = $1 * 60 * 60;
|
1860 |
|
|
} elsif ($2 eq "d") {
|
1861 |
|
|
$s = $1 * 60 * 60 * 24;
|
1862 |
|
|
} else {
|
1863 |
|
|
$s = $1;
|
1864 |
|
|
}
|
1865 |
|
|
} else {
|
1866 |
|
|
return undef;
|
1867 |
|
|
}
|
1868 |
|
|
$s;
|
1869 |
|
|
}
|
1870 |
|
|
|
1871 |
|
|
|
1872 |
|
|
#
|
1873 |
|
|
# reset the state of the server on SIGHUP, and reread config
|
1874 |
|
|
# file.
|
1875 |
|
|
#
|
1876 |
|
|
sub reset_server {
|
1877 |
|
|
my ($keepstate) = @_;
|
1878 |
|
|
|
1879 |
|
|
#
|
1880 |
|
|
# reap children that may be running
|
1881 |
|
|
#
|
1882 |
|
|
foreach my $pid (keys %runningpid) {
|
1883 |
|
|
my ($group, $service) = split (/\//, $runningpid{$pid});
|
1884 |
|
|
kill 15, $pid;
|
1885 |
|
|
waitpid ($pid, 0);
|
1886 |
|
|
syslog ('info', "reset killed child $pid, exit status $?");
|
1887 |
|
|
remove_proc ($pid);
|
1888 |
|
|
}
|
1889 |
|
|
|
1890 |
|
|
$procs = 0;
|
1891 |
|
|
save_state ("all") if ($keepstate);
|
1892 |
|
|
syslog ('info', "resetting, and re-reading configuration $CF{CF}");
|
1893 |
|
|
|
1894 |
|
|
if ((my $err = read_cf ($CF{"CF"}, 1)) ne "") {
|
1895 |
|
|
syslog ('err', "error reading config file: $err");
|
1896 |
|
|
return undef;
|
1897 |
|
|
}
|
1898 |
|
|
|
1899 |
|
|
normalize_paths;
|
1900 |
|
|
gen_scriptdir_hash;
|
1901 |
|
|
$lasttm=time; # the last time(2) the loop started
|
1902 |
|
|
$fdset_rbits = $fdset_ebits = '';
|
1903 |
|
|
set_last_test ();
|
1904 |
|
|
randomize_startdelay() if ($CF{"RANDSTART"});
|
1905 |
|
|
load_state ("all") if ($keepstate);
|
1906 |
|
|
if ($CF{"DTLOGGING"}) {
|
1907 |
|
|
init_dtlog();
|
1908 |
|
|
}
|
1909 |
|
|
|
1910 |
|
|
readhistoricfile;
|
1911 |
|
|
|
1912 |
|
|
if ($CF{"STARTUPALERTS_ON_RESET"}) {
|
1913 |
|
|
do_startup_alerts;
|
1914 |
|
|
}
|
1915 |
|
|
|
1916 |
|
|
return 1;
|
1917 |
|
|
}
|
1918 |
|
|
|
1919 |
|
|
|
1920 |
|
|
sub init_dtlog {
|
1921 |
|
|
my $t = time;
|
1922 |
|
|
|
1923 |
|
|
return if (!$CF{"DTLOGGING"});
|
1924 |
|
|
|
1925 |
|
|
if (!open (DTLOG, ">>$CF{DTLOGFILE}")) {
|
1926 |
|
|
syslog ('err', "could not append to $CF{DTLOGFILE}: $!");
|
1927 |
|
|
$CF{"DTLOGGING"} = 0;
|
1928 |
|
|
} else {
|
1929 |
|
|
$CF{"DTLOGGING"} = 1;
|
1930 |
|
|
print DTLOG <<EOF;
|
1931 |
|
|
#
|
1932 |
|
|
# downtime log start $t
|
1933 |
|
|
# time back up, group, service, first failure, downtime, interval, summary
|
1934 |
|
|
#
|
1935 |
|
|
EOF
|
1936 |
|
|
close (DTLOG);
|
1937 |
|
|
}
|
1938 |
|
|
}
|
1939 |
|
|
|
1940 |
|
|
|
1941 |
|
|
#
|
1942 |
|
|
# remove a process from our state
|
1943 |
|
|
#
|
1944 |
|
|
sub remove_proc {
|
1945 |
|
|
my ($pid) = @_;
|
1946 |
|
|
|
1947 |
|
|
return if (!defined $runningpid{$pid});
|
1948 |
|
|
|
1949 |
|
|
vec ($fdset_rbits, fileno($fhandles{$runningpid{$pid}}), 1) = 0;
|
1950 |
|
|
close ($fhandles{$runningpid{$pid}});
|
1951 |
|
|
delete $fhandles{$runningpid{$pid}};
|
1952 |
|
|
delete $running{$runningpid{$pid}};
|
1953 |
|
|
delete $runningpid{$pid};
|
1954 |
|
|
$procs--;
|
1955 |
|
|
}
|
1956 |
|
|
|
1957 |
|
|
|
1958 |
|
|
#
|
1959 |
|
|
# exit on SIGTERM
|
1960 |
|
|
#
|
1961 |
|
|
sub handle_sigterm {
|
1962 |
|
|
syslog ("info", "caught TERM signal, exiting");
|
1963 |
|
|
exit (1);
|
1964 |
|
|
}
|
1965 |
|
|
|
1966 |
|
|
|
1967 |
|
|
#
|
1968 |
|
|
# set O_NONBLOCK and FD_CLOEXEC on the given filehandle
|
1969 |
|
|
#
|
1970 |
|
|
sub configure_filehandle {
|
1971 |
|
|
my ($fh) = @_;
|
1972 |
|
|
my ($fl);
|
1973 |
|
|
|
1974 |
|
|
$fl = '';
|
1975 |
|
|
$fl = fcntl ($fh, F_GETFL, $fl) || return;
|
1976 |
|
|
$fl |= O_NONBLOCK;
|
1977 |
|
|
fcntl ($fh, F_SETFL, $fl) || return;
|
1978 |
|
|
|
1979 |
|
|
$fl = fcntl ($fh, F_GETFD, 0) || return;
|
1980 |
|
|
$fl |= FD_CLOEXEC;
|
1981 |
|
|
fcntl ($fh, F_SETFD, $fl) || return;
|
1982 |
|
|
|
1983 |
|
|
return 1;
|
1984 |
|
|
}
|
1985 |
|
|
|
1986 |
|
|
|
1987 |
|
|
#
|
1988 |
|
|
# setup server
|
1989 |
|
|
#
|
1990 |
|
|
sub setup_server {
|
1991 |
|
|
my ($tcpproto, $udpproto, $fl);
|
1992 |
|
|
|
1993 |
|
|
if (!defined ($tcpproto = getprotobyname ('tcp')))
|
1994 |
|
|
{
|
1995 |
|
|
die_die ("err", "could not get protocol for tcp");
|
1996 |
|
|
}
|
1997 |
|
|
|
1998 |
|
|
if (!defined ($udpproto = getprotobyname ('udp')))
|
1999 |
|
|
{
|
2000 |
|
|
die_die ("err", "could not get protocol for tcp");
|
2001 |
|
|
}
|
2002 |
|
|
|
2003 |
|
|
#
|
2004 |
|
|
# client server, such as moncmd
|
2005 |
|
|
#
|
2006 |
|
|
my $bindaddr;
|
2007 |
|
|
if (defined $CF{"SERVERBIND"})
|
2008 |
|
|
{
|
2009 |
|
|
if (!($bindaddr = gethostbyname ($CF{"SERVERBIND"})))
|
2010 |
|
|
{
|
2011 |
|
|
die_die ("err", "error returned by gethostbyname for serverbind: $?");
|
2012 |
|
|
}
|
2013 |
|
|
}
|
2014 |
|
|
|
2015 |
|
|
else
|
2016 |
|
|
{
|
2017 |
|
|
$bindaddr = INADDR_ANY;
|
2018 |
|
|
}
|
2019 |
|
|
|
2020 |
|
|
socket (SERVER, PF_INET, SOCK_STREAM, $tcpproto) ||
|
2021 |
|
|
die_die ("err", "could not create TCP socket: $!");
|
2022 |
|
|
|
2023 |
|
|
setsockopt (SERVER, SOL_SOCKET, SO_REUSEADDR, pack ("l", 1)) ||
|
2024 |
|
|
die_die ("err", "could not setsockopt: $!");
|
2025 |
|
|
|
2026 |
|
|
bind (SERVER, sockaddr_in ($CF{"SERVPORT"}, $bindaddr)) ||
|
2027 |
|
|
die_die ("err", "could not bind TCP server port $CF{'SERVPORT'}: $!");
|
2028 |
|
|
|
2029 |
|
|
listen (SERVER, SOMAXCONN);
|
2030 |
|
|
|
2031 |
|
|
configure_filehandle (*SERVER) ||
|
2032 |
|
|
die_die ("err", "could not configure TCP server port: $!");
|
2033 |
|
|
|
2034 |
|
|
#
|
2035 |
|
|
# remote monitor traps
|
2036 |
|
|
#
|
2037 |
|
|
if (defined $CF{"TRAPBIND"})
|
2038 |
|
|
{
|
2039 |
|
|
if (!($bindaddr = gethostbyname ($CF{"TRAPBIND"})))
|
2040 |
|
|
{
|
2041 |
|
|
die_die ("err", "error returned by gethostbyname for trapbind: $?");
|
2042 |
|
|
}
|
2043 |
|
|
}
|
2044 |
|
|
|
2045 |
|
|
else
|
2046 |
|
|
{
|
2047 |
|
|
$bindaddr = INADDR_ANY;
|
2048 |
|
|
}
|
2049 |
|
|
|
2050 |
|
|
socket (TRAPSERVER, PF_INET, SOCK_DGRAM, $udpproto) ||
|
2051 |
|
|
die_die ("err", "could not create UDP socket: $!");
|
2052 |
|
|
bind (TRAPSERVER, sockaddr_in ($CF{"TRAPPORT"}, $bindaddr)) ||
|
2053 |
|
|
die_die ("err", "could not bind UDP server port: $!");
|
2054 |
|
|
configure_filehandle (*TRAPSERVER) ||
|
2055 |
|
|
die_die ("err", "could not configure UDP trap port: $!");
|
2056 |
|
|
}
|
2057 |
|
|
|
2058 |
|
|
|
2059 |
|
|
#
|
2060 |
|
|
# set up a client connection if necessary
|
2061 |
|
|
#
|
2062 |
|
|
sub client_accept {
|
2063 |
|
|
my ($rin, $rout, $n, $sock, $port, $addr, $fl);
|
2064 |
|
|
|
2065 |
|
|
my $CLIENT = new FileHandle;
|
2066 |
|
|
|
2067 |
|
|
if (!defined ($sock = accept ($CLIENT, SERVER))) {
|
2068 |
|
|
syslog ('err', "accept returned error: $!");
|
2069 |
|
|
return;
|
2070 |
|
|
}
|
2071 |
|
|
|
2072 |
|
|
debug(1, "accepted client $CLIENT\n");
|
2073 |
|
|
my $fno = fileno ($CLIENT);
|
2074 |
|
|
|
2075 |
|
|
#
|
2076 |
|
|
# set socket to nonblocking
|
2077 |
|
|
#
|
2078 |
|
|
if (!configure_filehandle ($CLIENT)) {
|
2079 |
|
|
syslog ("err", "could not configure for client: $!");
|
2080 |
|
|
close ($CLIENT);
|
2081 |
|
|
return;
|
2082 |
|
|
}
|
2083 |
|
|
|
2084 |
|
|
($port, $addr) = unpack_sockaddr_in ($sock);
|
2085 |
|
|
my $clientip = inet_ntoa($addr);
|
2086 |
|
|
|
2087 |
|
|
syslog ('info', "client connection from $clientip:$port");
|
2088 |
|
|
|
2089 |
|
|
my @clientregex = split(' ', $CF{"CLIENTALLOW"});
|
2090 |
|
|
my $ipok= 0;
|
2091 |
|
|
|
2092 |
|
|
foreach my $ippattern (@clientregex)
|
2093 |
|
|
{
|
2094 |
|
|
#
|
2095 |
|
|
# change all periods, except those preceded by [ or \, into \.
|
2096 |
|
|
#
|
2097 |
|
|
$ippattern=~ s/([^[\\])\./$1\\./g;
|
2098 |
|
|
|
2099 |
|
|
if ($clientip =~ /^${ippattern}$/)
|
2100 |
|
|
{
|
2101 |
|
|
$ipok= 1;
|
2102 |
|
|
last;
|
2103 |
|
|
}
|
2104 |
|
|
}
|
2105 |
|
|
|
2106 |
|
|
if (! $ipok)
|
2107 |
|
|
{
|
2108 |
|
|
syslog('notice', "closing unwanted client: $clientip");
|
2109 |
|
|
close($CLIENT);
|
2110 |
|
|
return;
|
2111 |
|
|
}
|
2112 |
|
|
|
2113 |
|
|
select ($CLIENT);
|
2114 |
|
|
$|=1;
|
2115 |
|
|
select (STDOUT);
|
2116 |
|
|
|
2117 |
|
|
$clients{$fno}->{"host"} = inet_ntoa($addr);
|
2118 |
|
|
$clients{$fno}->{"fhandle"} = $CLIENT;
|
2119 |
|
|
$clients{$fno}->{"user"} = undef; # username if authenticated
|
2120 |
|
|
$clients{$fno}->{"timeout"} = $CF{"CLIENT_TIMEOUT"};
|
2121 |
|
|
$clients{$fno}->{"last_read"} = time; # last time data was read
|
2122 |
|
|
$clients{$fno}->{"buf"} = '';
|
2123 |
|
|
$numclients++;
|
2124 |
|
|
}
|
2125 |
|
|
|
2126 |
|
|
|
2127 |
|
|
#
|
2128 |
|
|
# do all pending client commands
|
2129 |
|
|
#
|
2130 |
|
|
sub client_dopending {
|
2131 |
|
|
my ($cl, $cmd, $l);
|
2132 |
|
|
|
2133 |
|
|
foreach $cl (keys %clients) {
|
2134 |
|
|
if ($clients{$cl}->{"buf"} =~ /^([^\r\n]*)[\r\n]+/s) {
|
2135 |
|
|
$cmd = $1;
|
2136 |
|
|
$l = length ($cmd);
|
2137 |
|
|
$clients{$cl}->{"buf"} =~ s/^[^\r\n]*[\r\n]+//s;
|
2138 |
|
|
client_command ($cl, $cmd);
|
2139 |
|
|
}
|
2140 |
|
|
}
|
2141 |
|
|
}
|
2142 |
|
|
|
2143 |
|
|
|
2144 |
|
|
#
|
2145 |
|
|
# close a client connection
|
2146 |
|
|
#
|
2147 |
|
|
sub client_close {
|
2148 |
|
|
my ($cl, $reason) = @_;
|
2149 |
|
|
|
2150 |
|
|
syslog ('info', "closing client $cl: $reason") if (defined $reason);
|
2151 |
|
|
die if !defined ($clients{$cl}->{"fhandle"});
|
2152 |
|
|
close ($clients{$cl}->{"fhandle"});
|
2153 |
|
|
delete $clients{$cl};
|
2154 |
|
|
vec ($iovec, $cl, 1) = 0;
|
2155 |
|
|
$numclients--;
|
2156 |
|
|
}
|
2157 |
|
|
|
2158 |
|
|
|
2159 |
|
|
#
|
2160 |
|
|
# Handle a connection from a client
|
2161 |
|
|
#
|
2162 |
|
|
sub client_command {
|
2163 |
|
|
my ($cl, $l) = @_;
|
2164 |
|
|
my ($cmd, $args, $group, $service, $s, $sname, $stchanged);
|
2165 |
|
|
my ($var, $value, $msg, @l, $sock, $port, $addr, $sref, $auth, $fh);
|
2166 |
|
|
my ($user, $pass, @argsList, $comment);
|
2167 |
|
|
my ($authtype, @authtypes);
|
2168 |
|
|
my $is_auth = 0; #flag for multiple auth types
|
2169 |
|
|
|
2170 |
|
|
syslog ('info', "client command \"$l\"")
|
2171 |
|
|
if ($l !~ /^\s*login/i);
|
2172 |
|
|
|
2173 |
|
|
$fh = $clients{$cl}->{"fhandle"};
|
2174 |
|
|
|
2175 |
|
|
if ($l !~ /^(dump|login|disable|enable|quit|list|set|get|setview|getview|
|
2176 |
|
|
stop|start|loadstate|savestate|reset|clear|checkauth|
|
2177 |
|
|
reload|term|test|servertime|ack|version|protid)(\s+(.*))?$/ix) {
|
2178 |
|
|
sock_write ($fh, "520 invalid command\n");
|
2179 |
|
|
return;
|
2180 |
|
|
}
|
2181 |
|
|
($cmd, $args) = ("\L$1", $3);
|
2182 |
|
|
|
2183 |
|
|
$stchanged = 0;
|
2184 |
|
|
|
2185 |
|
|
print STDERR "client command $cmd\nclient args $args\n";
|
2186 |
|
|
#
|
2187 |
|
|
# quit command
|
2188 |
|
|
#
|
2189 |
|
|
if ($cmd eq "quit") {
|
2190 |
|
|
sock_write ($fh, "220 quitting\n");
|
2191 |
|
|
client_close ($cl);
|
2192 |
|
|
|
2193 |
|
|
} elsif ($opt{"d"} && $cmd eq "dump") {
|
2194 |
|
|
print STDERR Dumper (\%watch), "\n\n";
|
2195 |
|
|
|
2196 |
|
|
#
|
2197 |
|
|
# protocol identification
|
2198 |
|
|
#
|
2199 |
|
|
} elsif ($cmd eq "protid") {
|
2200 |
|
|
if ($args != int ($PROT_VERSION))
|
2201 |
|
|
{
|
2202 |
|
|
sock_write ($fh, "520 protocol mismatch\n");
|
2203 |
|
|
}
|
2204 |
|
|
|
2205 |
|
|
else
|
2206 |
|
|
{
|
2207 |
|
|
sock_write ($fh, "220 protocol match\n");
|
2208 |
|
|
}
|
2209 |
|
|
|
2210 |
|
|
#
|
2211 |
|
|
# login
|
2212 |
|
|
#
|
2213 |
|
|
} elsif ($cmd eq "login") {
|
2214 |
|
|
($user, $pass) = split (/\s+/, $args, 2);
|
2215 |
|
|
@authtypes = split(' ' , $CF{"AUTHTYPE"}) ;
|
2216 |
|
|
# Check each for of authentication in order, and stop checking
|
2217 |
|
|
# as soon as we get a positive authentication result.
|
2218 |
|
|
foreach $authtype (@authtypes) {
|
2219 |
|
|
if (defined auth ($authtype, $user, $pass, $clients{$cl}->{"host"})) {
|
2220 |
|
|
$is_auth = 1;
|
2221 |
|
|
last;
|
2222 |
|
|
}
|
2223 |
|
|
}
|
2224 |
|
|
if ($is_auth != 1) {
|
2225 |
|
|
sock_write ($fh, "530 login unsuccessful\n");
|
2226 |
|
|
} else {
|
2227 |
|
|
$clients{$cl}->{"user"} = $user;
|
2228 |
|
|
syslog ("info", "authenticated $user");
|
2229 |
|
|
sock_write ($fh, "220 login accepted\n");
|
2230 |
|
|
}
|
2231 |
|
|
|
2232 |
|
|
#
|
2233 |
|
|
# reset
|
2234 |
|
|
#
|
2235 |
|
|
} elsif ($cmd eq "reset" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2236 |
|
|
my ($keepstate);
|
2237 |
|
|
if ($args =~ /stopped/i) {
|
2238 |
|
|
$STOPPED = 1;
|
2239 |
|
|
$STOPPED_TIME = time;
|
2240 |
|
|
}
|
2241 |
|
|
|
2242 |
|
|
if ($args =~ /keepstate/) {
|
2243 |
|
|
$keepstate = 1;
|
2244 |
|
|
}
|
2245 |
|
|
|
2246 |
|
|
if (reset_server ($keepstate)) {
|
2247 |
|
|
sock_write ($fh, "220 reset PID $$\@$HOSTNAME\n");
|
2248 |
|
|
} else {
|
2249 |
|
|
sock_write ($fh, "520 reset PID $$\@$HOSTNAME failed, error in config file\n");
|
2250 |
|
|
}
|
2251 |
|
|
|
2252 |
|
|
#
|
2253 |
|
|
# reload
|
2254 |
|
|
#
|
2255 |
|
|
} elsif ($cmd eq "reload" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2256 |
|
|
if (!defined reload (split (/\s+/, $args))) {
|
2257 |
|
|
sock_write ($fh, "520 unknown reload command\n");
|
2258 |
|
|
} else {
|
2259 |
|
|
sock_write ($fh, "220 reload completed\n");
|
2260 |
|
|
}
|
2261 |
|
|
|
2262 |
|
|
#
|
2263 |
|
|
# clear
|
2264 |
|
|
#
|
2265 |
|
|
} elsif ($cmd eq "clear" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2266 |
|
|
if ($args =~ /^timers \s+ ([a-zA-Z0-9_.-]+) \s+ ([a-zA-Z0-9_.-]+)/ix) {
|
2267 |
|
|
if (!defined $watch{$1}->{$2}) {
|
2268 |
|
|
sock_write ($fh, "520 unknown group\n");
|
2269 |
|
|
} else {
|
2270 |
|
|
clear_timers ($1, $2);
|
2271 |
|
|
sock_write ($fh, "220 clear timers completed\n");
|
2272 |
|
|
}
|
2273 |
|
|
|
2274 |
|
|
} else {
|
2275 |
|
|
sock_write ($fh, "520 unknown clear command\n");
|
2276 |
|
|
next;
|
2277 |
|
|
}
|
2278 |
|
|
|
2279 |
|
|
#
|
2280 |
|
|
# test
|
2281 |
|
|
#
|
2282 |
|
|
} elsif ($cmd eq "test" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2283 |
|
|
my ($cmd, $args) = split (/\s+/, $args, 2);
|
2284 |
|
|
|
2285 |
|
|
#
|
2286 |
|
|
# test monitor
|
2287 |
|
|
#
|
2288 |
|
|
if ($cmd eq "monitor") {
|
2289 |
|
|
my ($group, $service) = split (/\s+/, $args);
|
2290 |
|
|
|
2291 |
|
|
if (!defined $watch{$group}->{$service}) {
|
2292 |
|
|
sock_write ($fh, "$group $service not defined\n");
|
2293 |
|
|
} else {
|
2294 |
|
|
$watch{$group}->{$service}->{"_timer"} = 0;
|
2295 |
|
|
$watch{$group}->{$service}->{"_next_check"} = 0;
|
2296 |
|
|
mysystem("$CF{MONREMOTE} test $group $service") if ($CF{MONREMOTE});
|
2297 |
|
|
}
|
2298 |
|
|
sock_write ($fh, "220 test monitor completed\n");
|
2299 |
|
|
|
2300 |
|
|
#
|
2301 |
|
|
# test alert
|
2302 |
|
|
#
|
2303 |
|
|
} elsif ($cmd =~ /^alert|startupalert|upalert|ackalert|disablealert$/) {
|
2304 |
|
|
my ($group, $service, $retval, $period) = split (/\s+/, $args, 4);
|
2305 |
|
|
|
2306 |
|
|
if (!defined $watch{$group}->{$service}) {
|
2307 |
|
|
sock_write ($fh, "520 $group $service not defined\n");
|
2308 |
|
|
|
2309 |
|
|
} elsif (!defined $watch{$group}->{$service}->{"periods"}->{$period}) {
|
2310 |
|
|
sock_write ($fh, "520 period not defined\n");
|
2311 |
|
|
|
2312 |
|
|
} else {
|
2313 |
|
|
my $f = 0;
|
2314 |
|
|
my $a;
|
2315 |
|
|
|
2316 |
|
|
if ($cmd eq "alert") {
|
2317 |
|
|
$a = $watch{$group}->{$service}->{"periods"}->{$period}->{"alerts"};
|
2318 |
|
|
} elsif ($cmd eq "startupalert") {
|
2319 |
|
|
$f = $FL_STARTUPALERT;
|
2320 |
|
|
$a = $watch{$group}->{$service}->{"periods"}->{$period}->{"startupalerts"};
|
2321 |
|
|
} elsif ($cmd eq "upalert") {
|
2322 |
|
|
$f = $FL_UPALERT;
|
2323 |
|
|
$a = $watch{$group}->{$service}->{"periods"}->{$period}->{"upalerts"};
|
2324 |
|
|
} elsif ($cmd eq "ackalert") {
|
2325 |
|
|
$f = $FL_ACKALERT;
|
2326 |
|
|
$a = $watch{$group}->{$service}->{"periods"}->{$period}->{"ackalerts"};
|
2327 |
|
|
} elsif ($cmd eq "disablealert") {
|
2328 |
|
|
$f = $FL_DISABLEALERT;
|
2329 |
|
|
$a = $watch{$group}->{$service}->{"periods"}->{$period}->{"disablealerts"};
|
2330 |
|
|
}
|
2331 |
|
|
|
2332 |
|
|
for (@{$a}) {
|
2333 |
|
|
my ($alert, $args) = split (/\s+/, $_, 2);
|
2334 |
|
|
|
2335 |
|
|
if ($args =~ /^exit=/) {
|
2336 |
|
|
$args =~ s/^exit=\S+ \s+//x;
|
2337 |
|
|
}
|
2338 |
|
|
|
2339 |
|
|
call_alert (
|
2340 |
|
|
group => $group,
|
2341 |
|
|
service => $service,
|
2342 |
|
|
output => "test\ntest detail\n",
|
2343 |
|
|
retval => $retval,
|
2344 |
|
|
flags => $f | $FL_TEST,
|
2345 |
|
|
alert => $alert,
|
2346 |
|
|
args => $args,
|
2347 |
|
|
);
|
2348 |
|
|
}
|
2349 |
|
|
|
2350 |
|
|
sock_write ($fh, "220 test alert completed\n");
|
2351 |
|
|
}
|
2352 |
|
|
|
2353 |
|
|
#
|
2354 |
|
|
# test config file
|
2355 |
|
|
#
|
2356 |
|
|
} elsif ($cmd =~ /^config$/) {
|
2357 |
|
|
if ((my $err = read_cf ($CF{"CF"}, 0)) ne "") {
|
2358 |
|
|
sock_write ($fh, $err);
|
2359 |
|
|
sock_write ($fh, "\n520 test config completed, errors found in config file\n");
|
2360 |
|
|
}
|
2361 |
|
|
|
2362 |
|
|
else
|
2363 |
|
|
{
|
2364 |
|
|
sock_write ($fh, "220 test config completed OK, no errors found\n");
|
2365 |
|
|
}
|
2366 |
|
|
|
2367 |
|
|
} else {
|
2368 |
|
|
sock_write ($fh, "520 test error\n");
|
2369 |
|
|
}
|
2370 |
|
|
|
2371 |
|
|
#
|
2372 |
|
|
# version
|
2373 |
|
|
#
|
2374 |
|
|
} elsif ($cmd eq "version") {
|
2375 |
|
|
sock_write ($fh, "version " . int ($PROT_VERSION) . "\n");
|
2376 |
|
|
sock_write ($fh, "220 version completed\n");
|
2377 |
|
|
|
2378 |
|
|
#
|
2379 |
|
|
# load state
|
2380 |
|
|
#
|
2381 |
|
|
} elsif ($cmd eq "loadstate" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2382 |
|
|
foreach (split (/\s+/, $args)) {
|
2383 |
|
|
load_state ($_);
|
2384 |
|
|
}
|
2385 |
|
|
sock_write ($fh, "220 loadstate completed\n");
|
2386 |
|
|
|
2387 |
|
|
#
|
2388 |
|
|
# save state
|
2389 |
|
|
#
|
2390 |
|
|
} elsif ($cmd eq "savestate" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2391 |
|
|
if ($args =~ /\S/)
|
2392 |
|
|
{
|
2393 |
|
|
foreach (split (/\s+/, $args))
|
2394 |
|
|
{
|
2395 |
|
|
save_state ($_);
|
2396 |
|
|
}
|
2397 |
|
|
sock_write ($fh, "220 savestate completed\n");
|
2398 |
|
|
}
|
2399 |
|
|
|
2400 |
|
|
else
|
2401 |
|
|
{
|
2402 |
|
|
sock_write ($fh, "520 savestate error, arguments required\n");
|
2403 |
|
|
}
|
2404 |
|
|
|
2405 |
|
|
#
|
2406 |
|
|
# term
|
2407 |
|
|
#
|
2408 |
|
|
} elsif ($cmd eq "term" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2409 |
|
|
sock_write ($fh, "220 terminating server\n");
|
2410 |
|
|
client_close ($cl, "terminated by user command");
|
2411 |
|
|
syslog ("info", "terminating by user command");
|
2412 |
|
|
exit;
|
2413 |
|
|
|
2414 |
|
|
#
|
2415 |
|
|
# stop testing
|
2416 |
|
|
#
|
2417 |
|
|
} elsif ($cmd eq "stop"&& check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2418 |
|
|
$STOPPED = 1;
|
2419 |
|
|
$STOPPED_TIME = time;
|
2420 |
|
|
sock_write ($fh, "220 stop completed\n");
|
2421 |
|
|
|
2422 |
|
|
#
|
2423 |
|
|
# start testing
|
2424 |
|
|
#
|
2425 |
|
|
} elsif ($cmd eq "start" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2426 |
|
|
$STOPPED = 0;
|
2427 |
|
|
$STOPPED_TIME = 0;
|
2428 |
|
|
sock_write ($fh, "220 start completed\n");
|
2429 |
|
|
|
2430 |
|
|
} elsif ($cmd eq "setview") {
|
2431 |
|
|
my @args=split /\s+/, $args;
|
2432 |
|
|
if (@args > 1) {
|
2433 |
|
|
sock_write($fh, "500 Unknown setview command\n")
|
2434 |
|
|
} elsif (@args == 1) {
|
2435 |
|
|
if (defined($views{$args[0]})) {
|
2436 |
|
|
$clients{$cl}->{"view"} = $args[0];
|
2437 |
|
|
sock_write($fh, "selecting view $args[0]\n");
|
2438 |
|
|
sock_write($fh, "220 setview completed\n")
|
2439 |
|
|
} else {
|
2440 |
|
|
sock_write($fh, "504 unknown view $args[0]\n");
|
2441 |
|
|
}
|
2442 |
|
|
} else {
|
2443 |
|
|
delete $clients{$cl}->{"view"};
|
2444 |
|
|
sock_write($fh, "no view selected -- all groups will be displayed\n");
|
2445 |
|
|
sock_write($fh, "220 setview completed\n")
|
2446 |
|
|
}
|
2447 |
|
|
} elsif ($cmd eq "getview") {
|
2448 |
|
|
if ($clients{$cl}->{"view"}) {
|
2449 |
|
|
sock_write($fh, "view ".$clients{$cl}->{"view"}. " selected\n");
|
2450 |
|
|
} else {
|
2451 |
|
|
sock_write($fh, "no view selected -- all groups will be displayed\n");
|
2452 |
|
|
}
|
2453 |
|
|
sock_write($fh, "220 getview completed\n")
|
2454 |
|
|
#
|
2455 |
|
|
# set
|
2456 |
|
|
#
|
2457 |
|
|
} elsif ($cmd eq "set" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2458 |
|
|
if ($args =~ /^maxkeep\s+(\d+)/) {
|
2459 |
|
|
$CF{"MAX_KEEP"} = $1;
|
2460 |
|
|
sock_write ($fh, "220 set completed\n");
|
2461 |
|
|
} else {
|
2462 |
|
|
($group, $service, $var, $value) = split (/\s+/, $args, 4);
|
2463 |
|
|
if (!defined $watch{$group}->{$service}) {
|
2464 |
|
|
sock_write ($fh, "520 $group,$service not defined\n");
|
2465 |
|
|
} elsif ($var eq "opstatus") {
|
2466 |
|
|
if (!defined ($OPSTAT{$value})) {
|
2467 |
|
|
sock_write ($fh, "520 undefined opstatus\n");
|
2468 |
|
|
} else {
|
2469 |
|
|
set_op_status ($group, $service,
|
2470 |
|
|
un_esc_str ((parse_line ('\s+', 0, $value))[0]));
|
2471 |
|
|
sock_write ($fh, "220 set completed\n");
|
2472 |
|
|
}
|
2473 |
|
|
|
2474 |
|
|
} else {
|
2475 |
|
|
$value = un_esc_str ((parse_line ('\s+', 0, $value))[0]);
|
2476 |
|
|
$watch{$group}->{$service}->{$var} = $value;
|
2477 |
|
|
sock_write ($fh, "$group $service $var='$value'\n");
|
2478 |
|
|
sock_write ($fh, "220 set completed\n");
|
2479 |
|
|
}
|
2480 |
|
|
}
|
2481 |
|
|
|
2482 |
|
|
#
|
2483 |
|
|
# get
|
2484 |
|
|
#
|
2485 |
|
|
} elsif ($cmd eq "get" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2486 |
|
|
if ($args =~ /^maxkeep\s*$/) {
|
2487 |
|
|
sock_write ($fh, "maxkeep = $CF{MAX_KEEP}\n");
|
2488 |
|
|
sock_write ($fh, "220 set completed\n");
|
2489 |
|
|
} else {
|
2490 |
|
|
($group, $service, $var) = split (/\s+/, $args, 3);
|
2491 |
|
|
if (!defined $watch{$group}->{$service}) {
|
2492 |
|
|
sock_write ($fh, "520 $group,$service not defined\n");
|
2493 |
|
|
} else {
|
2494 |
|
|
sock_write ($fh, "$group $service $var='" .
|
2495 |
|
|
esc_str ($watch{$group}->{$service}->{$var}, 1) . "'\n");
|
2496 |
|
|
sock_write ($fh, "220 get completed\n");
|
2497 |
|
|
}
|
2498 |
|
|
}
|
2499 |
|
|
|
2500 |
|
|
#
|
2501 |
|
|
# list
|
2502 |
|
|
#
|
2503 |
|
|
} elsif ($cmd eq "list" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2504 |
|
|
@argsList = split(/\s+/, $args);
|
2505 |
|
|
($cmd, $args) = split (/\s+/, $args, 2);
|
2506 |
|
|
|
2507 |
|
|
#
|
2508 |
|
|
# list service descriptions
|
2509 |
|
|
#
|
2510 |
|
|
if ($cmd eq "descriptions") {
|
2511 |
|
|
foreach $group (keys %watch) {
|
2512 |
|
|
foreach $service (keys %{$watch{$group}}) {
|
2513 |
|
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2514 |
|
|
sock_write ($fh, "$group $service " .
|
2515 |
|
|
esc_str ($watch{$group}->{$service}->{"description"}, 1) .
|
2516 |
|
|
"\n");
|
2517 |
|
|
}
|
2518 |
|
|
}
|
2519 |
|
|
}
|
2520 |
|
|
sock_write ($fh, "220 list descriptions completed\n");
|
2521 |
|
|
|
2522 |
|
|
#
|
2523 |
|
|
# list group members
|
2524 |
|
|
#
|
2525 |
|
|
} elsif ($cmd eq "group") {
|
2526 |
|
|
if ($groups{$args}) {
|
2527 |
|
|
sock_write ($fh, "hostgroup $args @{$groups{$args}}\n");
|
2528 |
|
|
sock_write ($fh, "220 list group completed\n");
|
2529 |
|
|
} else {
|
2530 |
|
|
sock_write ($fh, "520 list group error, undefined group\n");
|
2531 |
|
|
}
|
2532 |
|
|
|
2533 |
|
|
#
|
2534 |
|
|
# list status of all services
|
2535 |
|
|
#
|
2536 |
|
|
} elsif ($cmd eq "opstatus") {
|
2537 |
|
|
if (!defined $args || $args eq "")
|
2538 |
|
|
{
|
2539 |
|
|
foreach $group (keys %watch) {
|
2540 |
|
|
foreach $service (keys %{$watch{$group}}) {
|
2541 |
|
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2542 |
|
|
client_write_opstatus ($fh, $group, $service);
|
2543 |
|
|
}
|
2544 |
|
|
}
|
2545 |
|
|
}
|
2546 |
|
|
sock_write ($fh, "220 list opstatus completed\n");
|
2547 |
|
|
}
|
2548 |
|
|
|
2549 |
|
|
else
|
2550 |
|
|
{
|
2551 |
|
|
my $err = 0;
|
2552 |
|
|
my @g = ();
|
2553 |
|
|
my ($group, $service);
|
2554 |
|
|
|
2555 |
|
|
foreach my $gs (split (/\s+/, $args))
|
2556 |
|
|
{
|
2557 |
|
|
($group, $service) = split (/,/, $gs);
|
2558 |
|
|
$err++ && last if ($service ne "" && !defined $watch{$group}->{$service});
|
2559 |
|
|
push (@g, [$group, $service]);
|
2560 |
|
|
}
|
2561 |
|
|
|
2562 |
|
|
if (!$err)
|
2563 |
|
|
{
|
2564 |
|
|
foreach my $gs (@g)
|
2565 |
|
|
{
|
2566 |
|
|
if ($gs->[1] ne "") {
|
2567 |
|
|
client_write_opstatus ($fh, $gs->[0], $gs->[1]);
|
2568 |
|
|
} else {
|
2569 |
|
|
foreach $service (keys %{$watch{$gs->[0]}}) {
|
2570 |
|
|
client_write_opstatus ($fh, $gs->[0], $service);
|
2571 |
|
|
}
|
2572 |
|
|
}
|
2573 |
|
|
}
|
2574 |
|
|
sock_write ($fh, "220 list opstatus completed\n");
|
2575 |
|
|
}
|
2576 |
|
|
|
2577 |
|
|
else
|
2578 |
|
|
{
|
2579 |
|
|
sock_write ($fh, "520 $group,$service does not exist\n");
|
2580 |
|
|
}
|
2581 |
|
|
}
|
2582 |
|
|
|
2583 |
|
|
#
|
2584 |
|
|
# list disabled hosts and services
|
2585 |
|
|
#
|
2586 |
|
|
} elsif ($cmd eq "disabled") {
|
2587 |
|
|
foreach $group (keys %groups) {
|
2588 |
|
|
if (view_match($clients{$cl}->{"view"}, $group, undef)) {
|
2589 |
|
|
@l = grep (/^\*/, @{$groups{$group}});
|
2590 |
|
|
if (@l) {
|
2591 |
|
|
grep (s/^\*//, @l);
|
2592 |
|
|
sock_write ($fh, "group $group: @l\n");
|
2593 |
|
|
}
|
2594 |
|
|
}
|
2595 |
|
|
}
|
2596 |
|
|
foreach $group (keys %watch) {
|
2597 |
|
|
if (view_match($clients{$cl}->{"view"}, $group, undef)) {
|
2598 |
|
|
if (exists $watch_disabled{$group} && $watch_disabled{$group} == 1) {
|
2599 |
|
|
sock_write ($fh, "watch $group\n");
|
2600 |
|
|
}
|
2601 |
|
|
}
|
2602 |
|
|
foreach $service (keys %{$watch{$group}}) {
|
2603 |
|
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2604 |
|
|
if (defined $watch{$group}->{$service}->{'disable'}
|
2605 |
|
|
&& $watch{$group}->{$service}->{'disable'} == 1) {
|
2606 |
|
|
sock_write ($fh, "watch $group service " .
|
2607 |
|
|
"$service\n");
|
2608 |
|
|
}
|
2609 |
|
|
}
|
2610 |
|
|
}
|
2611 |
|
|
}
|
2612 |
|
|
sock_write ($fh, "220 list disabled completed\n");
|
2613 |
|
|
|
2614 |
|
|
#
|
2615 |
|
|
# list last alert history
|
2616 |
|
|
#
|
2617 |
|
|
} elsif ($cmd eq "alerthist") {
|
2618 |
|
|
foreach my $l (@last_alerts)
|
2619 |
|
|
{
|
2620 |
|
|
sock_write ($fh, esc_str ($l) . "\n");
|
2621 |
|
|
}
|
2622 |
|
|
sock_write ($fh, "220 list alerthist completed\n");
|
2623 |
|
|
|
2624 |
|
|
#
|
2625 |
|
|
# list time of last failures for each service
|
2626 |
|
|
#
|
2627 |
|
|
} elsif ($cmd eq "failures") {
|
2628 |
|
|
foreach $group (keys %watch) {
|
2629 |
|
|
foreach $service (keys %{$watch{$group}}) {
|
2630 |
|
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2631 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
2632 |
|
|
client_write_opstatus ($fh, $group, $service)
|
2633 |
|
|
if ($FAILURE{$sref->{"_op_status"}});
|
2634 |
|
|
}
|
2635 |
|
|
}
|
2636 |
|
|
}
|
2637 |
|
|
sock_write ($fh, "220 list failures completed\n");
|
2638 |
|
|
|
2639 |
|
|
#
|
2640 |
|
|
# list the failure history
|
2641 |
|
|
#
|
2642 |
|
|
} elsif ($cmd eq "failurehist") {
|
2643 |
|
|
foreach my $l (@last_failures)
|
2644 |
|
|
{
|
2645 |
|
|
sock_write ($fh, esc_str ($l) . "\n");
|
2646 |
|
|
}
|
2647 |
|
|
sock_write ($fh, "220 list failurehist completed\n");
|
2648 |
|
|
|
2649 |
|
|
#
|
2650 |
|
|
# list the time of last successes for each service
|
2651 |
|
|
#
|
2652 |
|
|
} elsif ($cmd eq "successes") {
|
2653 |
|
|
foreach $group (keys %watch) {
|
2654 |
|
|
foreach $service (keys %{$watch{$group}}) {
|
2655 |
|
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2656 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
2657 |
|
|
client_write_opstatus ($fh, $group, $service)
|
2658 |
|
|
if ($SUCCESS{$sref->{"_op_status"}});
|
2659 |
|
|
}
|
2660 |
|
|
}
|
2661 |
|
|
}
|
2662 |
|
|
sock_write ($fh, "220 list successes completed\n");
|
2663 |
|
|
|
2664 |
|
|
#
|
2665 |
|
|
# list warnings
|
2666 |
|
|
#
|
2667 |
|
|
} elsif ($cmd eq "warnings") {
|
2668 |
|
|
foreach $group (keys %watch) {
|
2669 |
|
|
foreach $service (keys %{$watch{$group}}) {
|
2670 |
|
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2671 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
2672 |
|
|
client_write_opstatus ($fh, $group, $service)
|
2673 |
|
|
if ($WARNING{$sref->{"_op_status"}});
|
2674 |
|
|
}
|
2675 |
|
|
}
|
2676 |
|
|
}
|
2677 |
|
|
sock_write ($fh, "220 list successes completed\n");
|
2678 |
|
|
|
2679 |
|
|
#
|
2680 |
|
|
# list process IDs
|
2681 |
|
|
#
|
2682 |
|
|
} elsif ($cmd eq "pids") {
|
2683 |
|
|
sock_write ($fh, "server $$\n");
|
2684 |
|
|
foreach $value (keys %runningpid) {
|
2685 |
|
|
($group, $service) = split (/\//, $runningpid{$value});
|
2686 |
|
|
sock_write ($fh, "$group $service $value\n");
|
2687 |
|
|
}
|
2688 |
|
|
sock_write ($fh, "220 list pids completed\n");
|
2689 |
|
|
|
2690 |
|
|
#
|
2691 |
|
|
# list watch groups and services
|
2692 |
|
|
#
|
2693 |
|
|
} elsif ($cmd eq "watch") {
|
2694 |
|
|
foreach $group (keys %watch) {
|
2695 |
|
|
foreach $service (keys %{$watch{$group}}) {
|
2696 |
|
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2697 |
|
|
if (!defined $watch{$group}->{$service}) {
|
2698 |
|
|
sock_write ($fh, "$group (undefined service)\n");
|
2699 |
|
|
} else {
|
2700 |
|
|
sock_write ($fh, "$group $service\n");
|
2701 |
|
|
}
|
2702 |
|
|
}
|
2703 |
|
|
}
|
2704 |
|
|
}
|
2705 |
|
|
sock_write ($fh, "220 list watch completed\n");
|
2706 |
|
|
|
2707 |
|
|
#
|
2708 |
|
|
# list server state
|
2709 |
|
|
#
|
2710 |
|
|
} elsif ($cmd eq "state") {
|
2711 |
|
|
if ($STOPPED) {
|
2712 |
|
|
sock_write ($fh, "scheduler stopped since $STOPPED_TIME\n");
|
2713 |
|
|
} else {
|
2714 |
|
|
sock_write ($fh, "scheduler running\n");
|
2715 |
|
|
}
|
2716 |
|
|
sock_write ($fh, "220 list state completed\n");
|
2717 |
|
|
|
2718 |
|
|
#
|
2719 |
|
|
# list aliases
|
2720 |
|
|
#
|
2721 |
|
|
} elsif ($cmd eq "aliases") {
|
2722 |
|
|
my (@listAliasesRequest) = @argsList;
|
2723 |
|
|
|
2724 |
|
|
shift (@listAliasesRequest);
|
2725 |
|
|
|
2726 |
|
|
# if no alias request, all alias are responded
|
2727 |
|
|
unless (@listAliasesRequest) {
|
2728 |
|
|
@listAliasesRequest = keys (%alias);
|
2729 |
|
|
}
|
2730 |
|
|
|
2731 |
|
|
foreach my $alias (@listAliasesRequest){
|
2732 |
|
|
sock_write ($fh, "alias $alias\n");
|
2733 |
|
|
foreach $value (@{$alias{$alias}}) {
|
2734 |
|
|
sock_write ($fh, "$value\n");
|
2735 |
|
|
}
|
2736 |
|
|
sock_write ($fh, "\n");
|
2737 |
|
|
}
|
2738 |
|
|
sock_write ($fh, "220 list aliases completed\n");
|
2739 |
|
|
|
2740 |
|
|
#
|
2741 |
|
|
# list aliasgroups
|
2742 |
|
|
#
|
2743 |
|
|
} elsif ($cmd eq "aliasgroups") {
|
2744 |
|
|
my (@listAliasesRequest);
|
2745 |
|
|
@listAliasesRequest = keys (%alias);
|
2746 |
|
|
|
2747 |
|
|
sock_write ($fh, "@listAliasesRequest\n")
|
2748 |
|
|
unless (@listAliasesRequest == 0);
|
2749 |
|
|
sock_write ($fh, "220 list aliasgroups completed\n");
|
2750 |
|
|
|
2751 |
|
|
#
|
2752 |
|
|
# list deps
|
2753 |
|
|
#
|
2754 |
|
|
} elsif ($cmd eq "deps") {
|
2755 |
|
|
foreach my $g (keys %watch) {
|
2756 |
|
|
foreach my $s (keys %{$watch{$g}}) {
|
2757 |
|
|
if (view_match($clients{$cl}->{"view"}, $group, $service)) {
|
2758 |
|
|
my $sref = \%{$watch{$g}->{$s}};
|
2759 |
|
|
if ($sref->{"depend"} ne "") {
|
2760 |
|
|
sock_write ($fh, "exp $g $s '" .
|
2761 |
|
|
esc_str ($sref->{"depend"}, 1) . "'\n");
|
2762 |
|
|
} else {
|
2763 |
|
|
sock_write ($fh, "exp $g $s 'NONE'\n");
|
2764 |
|
|
}
|
2765 |
|
|
my @u =
|
2766 |
|
|
($sref->{"depend"} =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g);
|
2767 |
|
|
if (@u) {
|
2768 |
|
|
sock_write ($fh, "cmp $g $s @u\n");
|
2769 |
|
|
} else {
|
2770 |
|
|
sock_write ($fh, "cmp $g $s NONE\n");
|
2771 |
|
|
}
|
2772 |
|
|
}
|
2773 |
|
|
}
|
2774 |
|
|
}
|
2775 |
|
|
|
2776 |
|
|
sock_write ($fh, "220 list deps completed\n");
|
2777 |
|
|
|
2778 |
|
|
#
|
2779 |
|
|
# downtime log
|
2780 |
|
|
#
|
2781 |
|
|
} elsif ($cmd eq "dtlog") {
|
2782 |
|
|
if ($CF{"DTLOGGING"}) {
|
2783 |
|
|
if (!open (DTLOGTMP, "< $CF{DTLOGFILE}")) {
|
2784 |
|
|
sock_write ($fh, "520 list dtlog error, cannot open dtlog\n");
|
2785 |
|
|
|
2786 |
|
|
} else {
|
2787 |
|
|
while (<DTLOGTMP>) {
|
2788 |
|
|
sock_write ($fh, $_ ) if (!/^#/ && !/^\s*$/);
|
2789 |
|
|
}
|
2790 |
|
|
|
2791 |
|
|
close (DTLOGTMP);
|
2792 |
|
|
|
2793 |
|
|
sock_write ($fh, "220 list dtlog completed\n");
|
2794 |
|
|
}
|
2795 |
|
|
|
2796 |
|
|
} else {
|
2797 |
|
|
sock_write ($fh, "520 list dtlog error, dtlogging is not turned on\n");
|
2798 |
|
|
}
|
2799 |
|
|
|
2800 |
|
|
#
|
2801 |
|
|
# list available views
|
2802 |
|
|
#
|
2803 |
|
|
} elsif ($cmd eq "views") {
|
2804 |
|
|
sock_write ($fh, "views ".join(' ',sort(keys %views))."\n");
|
2805 |
|
|
sock_write ($fh, "220 list group completed\n");
|
2806 |
|
|
|
2807 |
|
|
|
2808 |
|
|
# unknown list command
|
2809 |
|
|
} else {
|
2810 |
|
|
sock_write ($fh, "520 unknown list command\n");
|
2811 |
|
|
}
|
2812 |
|
|
|
2813 |
|
|
#
|
2814 |
|
|
# acknowledge a failure
|
2815 |
|
|
#
|
2816 |
|
|
} elsif ($cmd eq "ack" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2817 |
|
|
my ($group, $service, $comment) = split (/\s+/, $args, 3);
|
2818 |
|
|
|
2819 |
|
|
if (!defined ($watch{$group})) {
|
2820 |
|
|
sock_write ($fh, "520 unknown group\n");
|
2821 |
|
|
|
2822 |
|
|
} elsif (!defined $watch{$group}->{$service}) {
|
2823 |
|
|
sock_write ($fh, "520 unknown service\n");
|
2824 |
|
|
}
|
2825 |
|
|
|
2826 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
2827 |
|
|
|
2828 |
|
|
if ($sref->{"_op_status"} == $STAT_OK ||
|
2829 |
|
|
$sref->{"_op_status"} == $STAT_UNTESTED) {
|
2830 |
|
|
sock_write ($fh, "520 service is in a non-failure state\n");
|
2831 |
|
|
|
2832 |
|
|
} else {
|
2833 |
|
|
$sref->{"_ack"} = time;
|
2834 |
|
|
$sref->{"_ack_comment"} = $clients{$cl}->{"user"} . ": " .
|
2835 |
|
|
un_esc_str ((parse_line ('\s+', 0, $comment))[0]);
|
2836 |
|
|
sock_write ($fh, "220 ack completed\n");
|
2837 |
|
|
do_alert($group, $service, $sref->{"_ack_comment"}, undef, $FL_ACKALERT)
|
2838 |
|
|
}
|
2839 |
|
|
|
2840 |
|
|
#
|
2841 |
|
|
# disable watch, service or host
|
2842 |
|
|
#
|
2843 |
|
|
} elsif ($cmd eq "disable" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2844 |
|
|
($cmd, $args) = split (/\s+/, $args, 2);
|
2845 |
|
|
|
2846 |
|
|
#
|
2847 |
|
|
# disable watch
|
2848 |
|
|
#
|
2849 |
|
|
if ($cmd eq "watch") {
|
2850 |
|
|
if (!defined (disen_watch($args, 0))) {
|
2851 |
|
|
sock_write ($fh, "520 disable error, unknown watch \"$args\"\n");
|
2852 |
|
|
} else {
|
2853 |
|
|
$stchanged++;
|
2854 |
|
|
mysystem("$CF{MONREMOTE} disable watch $args") if ($CF{MONREMOTE});
|
2855 |
|
|
sock_write ($fh, "220 disable watch completed\n");
|
2856 |
|
|
}
|
2857 |
|
|
|
2858 |
|
|
#
|
2859 |
|
|
# disable service
|
2860 |
|
|
#
|
2861 |
|
|
} elsif ($cmd eq "service") {
|
2862 |
|
|
($group, $service) = split (/\s+/, $args, 2);
|
2863 |
|
|
|
2864 |
|
|
if (!defined (disen_service ($group, $service, 0))) {
|
2865 |
|
|
sock_write ($fh, "520 disable error, unknown service\n");
|
2866 |
|
|
} else {
|
2867 |
|
|
$stchanged++;
|
2868 |
|
|
mysystem("$CF{MONREMOTE} disable service $group $service") if ($CF{MONREMOTE});
|
2869 |
|
|
sock_write ($fh, "220 disable service completed\n");
|
2870 |
|
|
do_alert($group, $service, $clients{$cl}->{"user"}, undef, $FL_DISABLEALERT)
|
2871 |
|
|
}
|
2872 |
|
|
|
2873 |
|
|
#
|
2874 |
|
|
# disable host
|
2875 |
|
|
#
|
2876 |
|
|
} elsif ($cmd eq "host") {
|
2877 |
|
|
my @notfound = ();
|
2878 |
|
|
|
2879 |
|
|
my @hosts = split (/\s+/, $args);
|
2880 |
|
|
|
2881 |
|
|
foreach my $h (@hosts)
|
2882 |
|
|
{
|
2883 |
|
|
if (!host_exists ($h))
|
2884 |
|
|
{
|
2885 |
|
|
push @notfound, $h;
|
2886 |
|
|
}
|
2887 |
|
|
}
|
2888 |
|
|
|
2889 |
|
|
if (@notfound)
|
2890 |
|
|
{
|
2891 |
|
|
sock_write ($fh, "520 disable host failed, host(s) @notfound do not exist\n");
|
2892 |
|
|
}
|
2893 |
|
|
|
2894 |
|
|
else
|
2895 |
|
|
{
|
2896 |
|
|
foreach my $h (@hosts)
|
2897 |
|
|
{
|
2898 |
|
|
#
|
2899 |
|
|
# disable a watch if there is a group with this host
|
2900 |
|
|
# as its only member. this prevents warning messages
|
2901 |
|
|
# about monitors not being run on empty host groups
|
2902 |
|
|
#
|
2903 |
|
|
foreach my $g (host_singleton_group($h)) {
|
2904 |
|
|
disen_watch($g, 0);
|
2905 |
|
|
mysystem("$CF{MONREMOTE} disable watch $g") if ($CF{MONREMOTE});
|
2906 |
|
|
}
|
2907 |
|
|
|
2908 |
|
|
disen_host ($h, 0);
|
2909 |
|
|
$stchanged++;
|
2910 |
|
|
mysystem("$CF{MONREMOTE} disable host $h") if ($CF{MONREMOTE});
|
2911 |
|
|
}
|
2912 |
|
|
sock_write ($fh, "220 disable host completed\n");
|
2913 |
|
|
}
|
2914 |
|
|
|
2915 |
|
|
} else {
|
2916 |
|
|
sock_write ($fh, "520 command could not be executed\n");
|
2917 |
|
|
}
|
2918 |
|
|
|
2919 |
|
|
#
|
2920 |
|
|
# enable watch, service or host
|
2921 |
|
|
#
|
2922 |
|
|
} elsif ($cmd eq "enable" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2923 |
|
|
($cmd, $args) = split (/\s+/, $args, 2);
|
2924 |
|
|
|
2925 |
|
|
#
|
2926 |
|
|
# enable watch
|
2927 |
|
|
#
|
2928 |
|
|
if ($cmd eq "watch") {
|
2929 |
|
|
if (!defined (disen_watch ($args, 1))) {
|
2930 |
|
|
sock_write ($fh, "520 enable error, unknown watch\n");
|
2931 |
|
|
} else {
|
2932 |
|
|
$stchanged++;
|
2933 |
|
|
mysystem("$CF{MONREMOTE} enable watch $args") if ($CF{MONREMOTE});
|
2934 |
|
|
sock_write ($fh, "220 enable watch completed\n");
|
2935 |
|
|
}
|
2936 |
|
|
|
2937 |
|
|
|
2938 |
|
|
#
|
2939 |
|
|
# enable service
|
2940 |
|
|
#
|
2941 |
|
|
} elsif ($cmd eq "service") {
|
2942 |
|
|
($group, $service) = split (/\s+/, $args, 2);
|
2943 |
|
|
|
2944 |
|
|
if (!defined (disen_service ($group, $service, 1))) {
|
2945 |
|
|
sock_write ($fh, "520 enable error, unknown group\n");
|
2946 |
|
|
} else {
|
2947 |
|
|
$stchanged++;
|
2948 |
|
|
mysystem("$CF{MONREMOTE} enable service $group $service") if ($CF{MONREMOTE});
|
2949 |
|
|
sock_write ($fh, "220 enable completed\n");
|
2950 |
|
|
}
|
2951 |
|
|
|
2952 |
|
|
#
|
2953 |
|
|
# enable host
|
2954 |
|
|
#
|
2955 |
|
|
} elsif ($cmd eq "host") {
|
2956 |
|
|
foreach my $h (split (/\s+/, $args)) {
|
2957 |
|
|
foreach my $g (host_singleton_group($h)) {
|
2958 |
|
|
disen_watch($g, 1);
|
2959 |
|
|
mysystem("$CF{MONREMOTE} enable watch $g") if ($CF{MONREMOTE});
|
2960 |
|
|
}
|
2961 |
|
|
|
2962 |
|
|
disen_host ($h, 1);
|
2963 |
|
|
mysystem("$CF{MONREMOTE} enable host $h") if ($CF{MONREMOTE});
|
2964 |
|
|
$stchanged++;
|
2965 |
|
|
}
|
2966 |
|
|
sock_write ($fh, "220 enable completed\n");
|
2967 |
|
|
|
2968 |
|
|
} else {
|
2969 |
|
|
sock_write ($fh, "520 command could not be executed\n");
|
2970 |
|
|
}
|
2971 |
|
|
|
2972 |
|
|
#
|
2973 |
|
|
# server time
|
2974 |
|
|
#
|
2975 |
|
|
} elsif ($cmd eq "servertime" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
|
2976 |
|
|
sock_write ($fh, join ("", time, " ", scalar (localtime), "\n"));
|
2977 |
|
|
sock_write ($fh, "220 servertime completed\n");
|
2978 |
|
|
|
2979 |
|
|
#
|
2980 |
|
|
# check auth
|
2981 |
|
|
#
|
2982 |
|
|
} elsif ($cmd eq "checkauth") {
|
2983 |
|
|
@_ = split(' ',$args);
|
2984 |
|
|
$cmd = $_[0];
|
2985 |
|
|
$user = $clients{$cl}->{"user"};
|
2986 |
|
|
# Note that we call check_auth without syslogging here.
|
2987 |
|
|
if (check_auth($clients{$cl}->{"user"}, $cmd, 1))
|
2988 |
|
|
{
|
2989 |
|
|
sock_write ($fh, "220 command authorized\n");
|
2990 |
|
|
}
|
2991 |
|
|
|
2992 |
|
|
else
|
2993 |
|
|
{
|
2994 |
|
|
sock_write ($fh, "520 command could not be executed\n");
|
2995 |
|
|
}
|
2996 |
|
|
|
2997 |
|
|
|
2998 |
|
|
} else {
|
2999 |
|
|
sock_write ($fh, "520 command could not be executed, unknown command\n");
|
3000 |
|
|
}
|
3001 |
|
|
|
3002 |
|
|
save_state ("disabled") if ($stchanged);
|
3003 |
|
|
syslog ('info', "finished client command \"$l\"")
|
3004 |
|
|
if ($l !~ /^\s*login/i);
|
3005 |
|
|
|
3006 |
|
|
}
|
3007 |
|
|
|
3008 |
|
|
|
3009 |
|
|
sub client_write_opstatus {
|
3010 |
|
|
my $fh = shift;
|
3011 |
|
|
my ($group, $service) = @_;
|
3012 |
|
|
|
3013 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
3014 |
|
|
my $summary = esc_str ($sref->{"_last_summary"}, 1);
|
3015 |
|
|
my $detail = esc_str ($sref->{"_last_detail"}, 1);
|
3016 |
|
|
my $depend = esc_str ($sref->{"depend"}, 1);
|
3017 |
|
|
my $hostdepend = esc_str ($sref->{"hostdepend"}, 1);
|
3018 |
|
|
my $monitordepend = esc_str ($sref->{"monitordepend"}, 1);
|
3019 |
|
|
my $alertdepend = esc_str ($sref->{"alertdepend"}, 1);
|
3020 |
|
|
my $monitor = esc_str ($sref->{"monitor"}, 1);
|
3021 |
|
|
|
3022 |
|
|
my $comment;
|
3023 |
|
|
if ($sref->{"_ack"} != 0) {
|
3024 |
|
|
$comment = esc_str ($sref->{"_ack_comment"}, 1);
|
3025 |
|
|
} else {
|
3026 |
|
|
$comment = '';
|
3027 |
|
|
}
|
3028 |
|
|
|
3029 |
|
|
my $alerts_sent = 0;
|
3030 |
|
|
my $l = 0;
|
3031 |
|
|
foreach my $period (keys %{$sref->{"periods"}})
|
3032 |
|
|
{
|
3033 |
|
|
$alerts_sent += $sref->{"periods"}->{$period}->{"_alert_sent"} if (!defined($sref->{"periods"}{$period}{"alerts_dont_count"}));
|
3034 |
|
|
$l = $sref->{"periods"}->{$period}->{"_last_alert"}
|
3035 |
|
|
if (defined $sref->{"periods"}->{$period}->{"_last_alert"} && $sref->{"periods"}->{$period}->{"_last_alert"} > $l);
|
3036 |
|
|
}
|
3037 |
|
|
|
3038 |
|
|
my $buf = sprintf("group=$group service=$service opstatus=$sref->{_op_status} last_opstatus=%s exitval=%s timer=%s last_success=%s last_trap=%s last_traphost=%s last_check=%s ack=%s ackcomment=$comment alerts_sent=$alerts_sent depstatus=%s depend=$depend hostdepend=$hostdepend monitordepend=$monitordepend alertdepend=$alertdepend monitor=$monitor last_summary=%s last_detail=%s", (defined $sref->{_last_op_status} ? $sref->{_last_op_status} : ""), (defined $sref->{_exitval} ? $sref->{_exitval} : ""), (defined $sref->{_timer} ? $sref->{_timer} : ""), (defined $sref->{_last_success} ? $sref->{_last_success} : ""), (defined $sref->{_last_trap} ? $sref->{_last_trap} : ""), (defined $sref->{_last_traphost} ? $sref->{_last_traphost} : ""), (defined $sref->{_last_check} ? $sref->{_last_check} : ""), (defined $sref->{_ack} ? $sref->{_ack} : ""), (defined $sref->{"_depend_status"} ? int ($sref->{"_depend_status"}) : ""), $summary, $detail);
|
3039 |
|
|
|
3040 |
|
|
$buf .= " last_failure=$sref->{_last_failure}"
|
3041 |
|
|
if ($sref->{"_last_failure"});
|
3042 |
|
|
|
3043 |
|
|
|
3044 |
|
|
if ($sref->{"interval"})
|
3045 |
|
|
{
|
3046 |
|
|
$buf .= " interval=$sref->{interval}" .
|
3047 |
|
|
" monitor_duration=$sref->{_monitor_duration}" .
|
3048 |
|
|
" monitor_running=$sref->{_monitor_running}"
|
3049 |
|
|
}
|
3050 |
|
|
|
3051 |
|
|
$buf .= " exclude_period=". esc_str($sref->{exclude_period})
|
3052 |
|
|
if ($sref->{"exclude_period"} ne "");
|
3053 |
|
|
|
3054 |
|
|
$buf .= " exclude_hosts=" .
|
3055 |
|
|
esc_str(join (" ", keys %{$sref->{exclude_hosts}}))
|
3056 |
|
|
if (keys %{$sref->{"exclude_hosts"}});
|
3057 |
|
|
|
3058 |
|
|
$buf .= " randskew=$sref->{randskew}"
|
3059 |
|
|
if ($sref->{"randskew"});
|
3060 |
|
|
|
3061 |
|
|
|
3062 |
|
|
$buf .= " last_alert=$l"
|
3063 |
|
|
if ($l);
|
3064 |
|
|
|
3065 |
|
|
if ($sref->{"_first_failure"})
|
3066 |
|
|
{
|
3067 |
|
|
my $t = time - $sref->{"_first_failure"};
|
3068 |
|
|
|
3069 |
|
|
$buf .= " first_failure=$sref->{_first_failure}" .
|
3070 |
|
|
" failure_duration=$t";
|
3071 |
|
|
}
|
3072 |
|
|
|
3073 |
|
|
# if ($sref->{"_first_success"})
|
3074 |
|
|
# {
|
3075 |
|
|
# my $t = time - $sref->{"_first_success"};
|
3076 |
|
|
|
3077 |
|
|
# $buf .= " first_success=$sref->{_first_success}" .
|
3078 |
|
|
# " success_duration=$t";
|
3079 |
|
|
# }
|
3080 |
|
|
|
3081 |
|
|
$buf .= "\n";
|
3082 |
|
|
|
3083 |
|
|
sock_write ($fh, $buf);
|
3084 |
|
|
}
|
3085 |
|
|
|
3086 |
|
|
|
3087 |
|
|
#
|
3088 |
|
|
# show usage
|
3089 |
|
|
#
|
3090 |
|
|
sub usage {
|
3091 |
|
|
print <<"EOF";
|
3092 |
|
|
usage: mon [-a dir] [-A file] [-b dir] [-B dir] [-c config] [-d]
|
3093 |
|
|
[-D dir] [-f] [-h] [-i secs] [-k num] [-l [type]] [-L dir]
|
3094 |
|
|
[-M [path]] [-m num] [-p num] [-P file] [-r num] [-s dir]
|
3095 |
|
|
[-S] [-t num]
|
3096 |
|
|
mon -v
|
3097 |
|
|
|
3098 |
|
|
-a dir alert script dir
|
3099 |
|
|
-A file authorization file
|
3100 |
|
|
-b dir base directory for alerts and monitors (basedir)
|
3101 |
|
|
-B dir base directory for configuration files (cfbasedir)
|
3102 |
|
|
-c config config file, defaults to "mon.cf"
|
3103 |
|
|
-d debug
|
3104 |
|
|
-D dir state directory (statedir)
|
3105 |
|
|
-f fork and become a daemon
|
3106 |
|
|
-h this help
|
3107 |
|
|
-i secs sleep interval (seconds), defaults to 1
|
3108 |
|
|
-k num keep history of last num events
|
3109 |
|
|
-l [type] load some types of old state from statedir. type can
|
3110 |
|
|
be disabled (default), opstatus or all.
|
3111 |
|
|
-L dir log directory (logdir)
|
3112 |
|
|
-M [path] pre-process config file with m4. if m4 isn't in \$PATH
|
3113 |
|
|
specify the path to m4 here
|
3114 |
|
|
-m num throttle at maximum number of monitor processes
|
3115 |
|
|
-O facility syslog facility to use
|
3116 |
|
|
-p num server listens on port num
|
3117 |
|
|
-P file PID file
|
3118 |
|
|
-r num randomize startup schedule
|
3119 |
|
|
-s dir monitor script dir
|
3120 |
|
|
-S start with scheduler stopped
|
3121 |
|
|
-t port trap port
|
3122 |
|
|
-v print version
|
3123 |
|
|
|
3124 |
|
|
Report bugs to $AUTHOR
|
3125 |
|
|
$RCSID
|
3126 |
|
|
EOF
|
3127 |
|
|
}
|
3128 |
|
|
|
3129 |
|
|
|
3130 |
|
|
#
|
3131 |
|
|
# become a daemon
|
3132 |
|
|
#
|
3133 |
|
|
sub daemon {
|
3134 |
|
|
my $pid;
|
3135 |
|
|
|
3136 |
|
|
if ($pid = fork()) {
|
3137 |
|
|
# the parent goes away all happy and stuff
|
3138 |
|
|
exit (0);
|
3139 |
|
|
} elsif (!defined $pid) {
|
3140 |
|
|
die "could not fork: $!\n";
|
3141 |
|
|
}
|
3142 |
|
|
|
3143 |
|
|
setsid();
|
3144 |
|
|
|
3145 |
|
|
#
|
3146 |
|
|
# make it so that we cannot regain a controlling terminal
|
3147 |
|
|
#
|
3148 |
|
|
if ($pid = fork()) {
|
3149 |
|
|
# the parent goes away all happy and stuff
|
3150 |
|
|
exit (0);
|
3151 |
|
|
} elsif (!defined $pid) {
|
3152 |
|
|
syslog ('err', "could not fork: $!");
|
3153 |
|
|
exit 1;
|
3154 |
|
|
}
|
3155 |
|
|
|
3156 |
|
|
# chdir ('/');
|
3157 |
|
|
umask (022);
|
3158 |
|
|
|
3159 |
|
|
if (!open (N, "+>>" . $CF{"MONERRFILE"}))
|
3160 |
|
|
{
|
3161 |
|
|
syslog ("err", "could not open error output file $CF{'MONERRFILE'}: $!");
|
3162 |
|
|
exit (1);
|
3163 |
|
|
}
|
3164 |
|
|
|
3165 |
|
|
select (N);
|
3166 |
|
|
$| = 1;
|
3167 |
|
|
select (STDOUT);
|
3168 |
|
|
|
3169 |
|
|
if (!open (STDIN, "/dev/null"))
|
3170 |
|
|
{
|
3171 |
|
|
syslog ("err", "could not open STDIN from /dev/null: $!");
|
3172 |
|
|
exit (1);
|
3173 |
|
|
}
|
3174 |
|
|
|
3175 |
|
|
print N "Mon starting at ".localtime(time)."\n";
|
3176 |
|
|
if (!open(STDOUT, ">&N") ||
|
3177 |
|
|
!open (STDERR, ">&N")) {
|
3178 |
|
|
syslog ("err", "could not redirect: $!");
|
3179 |
|
|
exit(1);
|
3180 |
|
|
}
|
3181 |
|
|
syslog ('info', "running as daemon");
|
3182 |
|
|
}
|
3183 |
|
|
|
3184 |
|
|
|
3185 |
|
|
#
|
3186 |
|
|
# debug
|
3187 |
|
|
#
|
3188 |
|
|
sub debug {
|
3189 |
|
|
my ($level, @l) = @_;
|
3190 |
|
|
|
3191 |
|
|
return if (!defined $opt{"d"} || $level > $opt{"d"});
|
3192 |
|
|
|
3193 |
|
|
if ($opt{"d"} && !$opt{"f"}) {
|
3194 |
|
|
print STDERR @l;
|
3195 |
|
|
} else {
|
3196 |
|
|
syslog ('debug', join ('', @l));
|
3197 |
|
|
}
|
3198 |
|
|
}
|
3199 |
|
|
|
3200 |
|
|
|
3201 |
|
|
#
|
3202 |
|
|
# die_die
|
3203 |
|
|
#
|
3204 |
|
|
sub die_die {
|
3205 |
|
|
my ($level, $msg) = @_;
|
3206 |
|
|
|
3207 |
|
|
die "[$level] $msg\n" if ($opt{"d"});
|
3208 |
|
|
|
3209 |
|
|
syslog ($level, "fatal, $msg");
|
3210 |
|
|
closelog();
|
3211 |
|
|
exit (1);
|
3212 |
|
|
}
|
3213 |
|
|
|
3214 |
|
|
|
3215 |
|
|
#
|
3216 |
|
|
# handle cleanup of exited processes
|
3217 |
|
|
# trigger alerts on failures (or send no alert if disabled)
|
3218 |
|
|
# do some accounting
|
3219 |
|
|
#
|
3220 |
|
|
sub proc_cleanup {
|
3221 |
|
|
my ($summary, $tmnow, $buf);
|
3222 |
|
|
|
3223 |
|
|
$tmnow = time;
|
3224 |
|
|
return if (keys %running == 0);
|
3225 |
|
|
|
3226 |
|
|
while ((my $p = waitpid (-1, &WNOHANG)) >0)
|
3227 |
|
|
{
|
3228 |
|
|
next if (!exists $runningpid{$p});
|
3229 |
|
|
my ($group, $service) = split (/\//, $runningpid{$p});
|
3230 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
3231 |
|
|
|
3232 |
|
|
#
|
3233 |
|
|
# suck in any extra data
|
3234 |
|
|
#
|
3235 |
|
|
my $fh = $fhandles{$runningpid{$p}};
|
3236 |
|
|
while (my $z = sysread ($fh, $buf, 8192))
|
3237 |
|
|
{
|
3238 |
|
|
$ibufs{$runningpid{$p}} .= $buf;
|
3239 |
|
|
}
|
3240 |
|
|
|
3241 |
|
|
debug (1, "PID $p ($runningpid{$p}) exited with [" . int ($?>>8) . "]\n");
|
3242 |
|
|
|
3243 |
|
|
$sref->{"_monitor_duration"} = $tmnow - $sref->{"_last_check"};
|
3244 |
|
|
|
3245 |
|
|
$sref->{"_monitor_running"} = 0;
|
3246 |
|
|
|
3247 |
|
|
process_event ("m", $group, $service, int ($?>>8), $ibufs{$runningpid{$p}});
|
3248 |
|
|
|
3249 |
|
|
reset_timer ($group, $service);
|
3250 |
|
|
|
3251 |
|
|
remove_proc ($p);
|
3252 |
|
|
}
|
3253 |
|
|
}
|
3254 |
|
|
|
3255 |
|
|
|
3256 |
|
|
#
|
3257 |
|
|
# handle the event where a monitor exits or a trap is received
|
3258 |
|
|
#
|
3259 |
|
|
# $type is "m" for monitor, "t" for trap
|
3260 |
|
|
#
|
3261 |
|
|
sub process_event {
|
3262 |
|
|
my ($type, $group, $service, $exitval, $output) = @_;
|
3263 |
|
|
|
3264 |
|
|
debug (1, "process_event type=$type group=$group service=$service exitval=$exitval output=[$output]\n");
|
3265 |
|
|
|
3266 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
3267 |
|
|
my $tmnow = time;
|
3268 |
|
|
|
3269 |
|
|
my ($summary, $detail) = split("\n", $output, 2);
|
3270 |
|
|
|
3271 |
|
|
$sref->{"_exitval"} = $exitval;
|
3272 |
|
|
|
3273 |
|
|
if ($sref->{"depend"} ne "" &&
|
3274 |
|
|
$sref->{"dep_behavior"} eq "a")
|
3275 |
|
|
{
|
3276 |
|
|
dep_ok ($sref, 'a');
|
3277 |
|
|
}
|
3278 |
|
|
|
3279 |
|
|
#
|
3280 |
|
|
# error exit value
|
3281 |
|
|
#
|
3282 |
|
|
if ($exitval)
|
3283 |
|
|
{
|
3284 |
|
|
#
|
3285 |
|
|
# accounting
|
3286 |
|
|
#
|
3287 |
|
|
$sref->{"_failure_count"}++;
|
3288 |
|
|
$sref->{"_consec_failures"}++;
|
3289 |
|
|
$sref->{"_last_failure"} = $tmnow;
|
3290 |
|
|
if ($sref->{"_op_status"} == $STAT_OK ||
|
3291 |
|
|
$sref->{"_op_status"} == $STAT_UNKNOWN ||
|
3292 |
|
|
$sref->{"_op_status"} == $STAT_UNTESTED)
|
3293 |
|
|
{
|
3294 |
|
|
$sref->{"_first_failure"} = $tmnow;
|
3295 |
|
|
}
|
3296 |
|
|
set_op_status ($group, $service, $STAT_FAIL);
|
3297 |
|
|
|
3298 |
|
|
$summary = "(NO SUMMARY)" if ($summary =~ /^\s*$/m);
|
3299 |
|
|
$sref->{"_last_summary"} = $summary;
|
3300 |
|
|
$sref->{"_last_detail"} = $detail;
|
3301 |
|
|
shift @last_failures if (@last_failures > $CF{"MAX_KEEP"});
|
3302 |
|
|
push @last_failures, "$group $service" .
|
3303 |
|
|
" $tm $summary";
|
3304 |
|
|
syslog ('crit', "failure for $last_failures[-1]");
|
3305 |
|
|
|
3306 |
|
|
#
|
3307 |
|
|
# send an alert if necessary
|
3308 |
|
|
#
|
3309 |
|
|
if ($type eq "m")
|
3310 |
|
|
{
|
3311 |
|
|
do_alert ($group, $service, $output, $exitval, $FL_MONITOR);
|
3312 |
|
|
#
|
3313 |
|
|
# change interval if needed
|
3314 |
|
|
#
|
3315 |
|
|
if (defined ($sref->{"failure_interval"}) &&
|
3316 |
|
|
!defined $sref->{"_old_interval"})
|
3317 |
|
|
{
|
3318 |
|
|
$sref->{"_old_interval"} = $sref->{"interval"};
|
3319 |
|
|
$sref->{"interval"} = $sref->{"failure_interval"};
|
3320 |
|
|
$sref->{"_next_check"} = 0;
|
3321 |
|
|
}
|
3322 |
|
|
}
|
3323 |
|
|
|
3324 |
|
|
elsif ($type eq "t")
|
3325 |
|
|
{
|
3326 |
|
|
do_alert ($group, $service, $output, $exitval, $FL_TRAP);
|
3327 |
|
|
}
|
3328 |
|
|
|
3329 |
|
|
elsif ($type eq "T")
|
3330 |
|
|
{
|
3331 |
|
|
do_alert ($group, $service, $output, $exitval, $FL_TRAPTIMEOUT);
|
3332 |
|
|
}
|
3333 |
|
|
|
3334 |
|
|
$sref->{"_failure_output"} = $output;
|
3335 |
|
|
}
|
3336 |
|
|
|
3337 |
|
|
#
|
3338 |
|
|
# success exit value
|
3339 |
|
|
#
|
3340 |
|
|
else
|
3341 |
|
|
{
|
3342 |
|
|
if ($CF{"DTLOGGING"} && defined ($sref->{"_op_status"}) &&
|
3343 |
|
|
$sref->{"_op_status"} == $STAT_FAIL)
|
3344 |
|
|
{
|
3345 |
|
|
write_dtlog ($sref, $group, $service);
|
3346 |
|
|
}
|
3347 |
|
|
|
3348 |
|
|
my $old_status = $sref->{"_op_status"};
|
3349 |
|
|
set_op_status ($group, $service, $STAT_OK);
|
3350 |
|
|
|
3351 |
|
|
if ($type eq "t")
|
3352 |
|
|
{
|
3353 |
|
|
$sref->{"_last_uptrap"} = $tmnow;
|
3354 |
|
|
}
|
3355 |
|
|
|
3356 |
|
|
#
|
3357 |
|
|
# if this service has just come back up and
|
3358 |
|
|
# we are paying attention to this event,
|
3359 |
|
|
# let someone know
|
3360 |
|
|
#
|
3361 |
|
|
if (($sref->{"redistribute"} ne '') ||
|
3362 |
|
|
((defined ($sref->{"_op_status"})) &&
|
3363 |
|
|
($old_status == $STAT_FAIL) &&
|
3364 |
|
|
(defined($sref->{"_upalert"})) &&
|
3365 |
|
|
(!defined($sref->{"upalertafter"})
|
3366 |
|
|
|| (($tmnow - $sref->{"_first_failure"}) >= $sref->{"upalertafter"}))))
|
3367 |
|
|
{
|
3368 |
|
|
# Save the last failing monitor's output for posterity
|
3369 |
|
|
$sref->{"_upalertoutput"}= $sref->{"_last_output"};
|
3370 |
|
|
do_alert ($group, $service, $sref->{"_upalertoutput"}, 0, $FL_UPALERT);
|
3371 |
|
|
}
|
3372 |
|
|
|
3373 |
|
|
#
|
3374 |
|
|
# send also when no upalertafter set
|
3375 |
|
|
# cabo: Modified to always send
|
3376 |
|
|
#
|
3377 |
|
|
#elsif (defined($sref->{"_upalert"}) && $old_status == $STAT_FAIL)
|
3378 |
|
|
elsif (defined($sref->{"_upalert"}) && ($old_status == $STAT_FAIL || $old_status == $STAT_UNTESTED))
|
3379 |
|
|
{
|
3380 |
|
|
do_alert ($group, $service, $sref->{"_upalertoutput"}, 0, $FL_UPALERT);
|
3381 |
|
|
}
|
3382 |
|
|
|
3383 |
|
|
$sref->{"_ack"} = 0;
|
3384 |
|
|
$sref->{"_ack_comment"} = '';
|
3385 |
|
|
$sref->{"_first_failure"} = 0;
|
3386 |
|
|
$sref->{"_last_failure"} = 0;
|
3387 |
|
|
$sref->{"_consec_failures"} = 0;
|
3388 |
|
|
$sref->{"_failure_output"} = "";
|
3389 |
|
|
$sref->{"_last_summary"} = $summary;
|
3390 |
|
|
$sref->{"_last_detail"} = $detail;
|
3391 |
|
|
|
3392 |
|
|
#
|
3393 |
|
|
# reset the alertevery timer
|
3394 |
|
|
#
|
3395 |
|
|
foreach my $period (keys %{$sref->{"periods"}})
|
3396 |
|
|
{
|
3397 |
|
|
#
|
3398 |
|
|
# "alertevery strict" should not reset _last_alert
|
3399 |
|
|
#
|
3400 |
|
|
if (!$sref->{"periods"}->{$period}->{"_alertevery_strict"})
|
3401 |
|
|
{
|
3402 |
|
|
$sref->{"periods"}->{$period}->{"_last_alert"} = 0;
|
3403 |
|
|
}
|
3404 |
|
|
|
3405 |
|
|
$sref->{"periods"}->{$period}->{"_1stfailtime"} = 0;
|
3406 |
|
|
$sref->{"periods"}->{$period}->{"_alert_sent"} = 0;
|
3407 |
|
|
}
|
3408 |
|
|
|
3409 |
|
|
#
|
3410 |
|
|
# change interval back to original
|
3411 |
|
|
#
|
3412 |
|
|
if (defined ($sref->{"failure_interval"}) &&
|
3413 |
|
|
$sref->{"_old_interval"} != undef)
|
3414 |
|
|
{
|
3415 |
|
|
$sref->{"interval"} = $sref->{"_old_interval"};
|
3416 |
|
|
$sref->{"_old_interval"} = undef;
|
3417 |
|
|
$sref->{"_next_check"} = 0;
|
3418 |
|
|
}
|
3419 |
|
|
|
3420 |
|
|
$sref->{"_last_success"} = $tmnow;
|
3421 |
|
|
|
3422 |
|
|
}
|
3423 |
|
|
|
3424 |
|
|
#
|
3425 |
|
|
# save the output
|
3426 |
|
|
#
|
3427 |
|
|
$sref->{"_last_output"} = $output;
|
3428 |
|
|
$sref->{"_last_summary"} = $summary;
|
3429 |
|
|
$sref->{"_last_detail"} = $detail;
|
3430 |
|
|
}
|
3431 |
|
|
|
3432 |
|
|
|
3433 |
|
|
#
|
3434 |
|
|
# collect output from running processes
|
3435 |
|
|
#
|
3436 |
|
|
sub collect_output {
|
3437 |
|
|
my ($buf, $rout);
|
3438 |
|
|
|
3439 |
|
|
return if (!keys %running);
|
3440 |
|
|
|
3441 |
|
|
my $nfound = select ($rout=$fdset_rbits, undef, undef, 0);
|
3442 |
|
|
debug (1, "select returned $nfound file handles\n");
|
3443 |
|
|
|
3444 |
|
|
return if ($! == &EINTR);
|
3445 |
|
|
|
3446 |
|
|
if ($nfound) {
|
3447 |
|
|
#
|
3448 |
|
|
# look for the file descriptors that are readable,
|
3449 |
|
|
# and try to read as much as possible from them
|
3450 |
|
|
#
|
3451 |
|
|
foreach my $k (keys %fhandles) {
|
3452 |
|
|
my $fh = $fhandles{$k};
|
3453 |
|
|
if (vec ($rout, fileno($fh), 1) == 1) {
|
3454 |
|
|
my $z = 0;
|
3455 |
|
|
while ($z = sysread ($fh, $buf, 8192)) {
|
3456 |
|
|
$ibufs{$k} .= $buf;
|
3457 |
|
|
debug (1, "[$buf] from $fh\n");
|
3458 |
|
|
}
|
3459 |
|
|
|
3460 |
|
|
#
|
3461 |
|
|
# ignore if EAGAIN, since we're nonblocking
|
3462 |
|
|
#
|
3463 |
|
|
if (!defined($z) && $! == &EAGAIN) {
|
3464 |
|
|
|
3465 |
|
|
#
|
3466 |
|
|
# error on this descriptor
|
3467 |
|
|
#
|
3468 |
|
|
} elsif (!defined($z)) {
|
3469 |
|
|
debug (1, "error on $fh: $!\n");
|
3470 |
|
|
syslog ('err', "error on $fh: $!");
|
3471 |
|
|
vec($fdset_rbits, fileno($fh), 1) = 0;
|
3472 |
|
|
} elsif ($z == 0 && $! == &EAGAIN) {
|
3473 |
|
|
debug (1, "EAGAIN on $fh\n");
|
3474 |
|
|
|
3475 |
|
|
#
|
3476 |
|
|
# if EOF encountered, stop trying to
|
3477 |
|
|
# get input from this file descriptor
|
3478 |
|
|
#
|
3479 |
|
|
} elsif ($z == 0) {
|
3480 |
|
|
debug (1, "EOF on $fh\n");
|
3481 |
|
|
vec($fdset_rbits, fileno($fh), 1) = 0;
|
3482 |
|
|
|
3483 |
|
|
}
|
3484 |
|
|
}
|
3485 |
|
|
}
|
3486 |
|
|
}
|
3487 |
|
|
}
|
3488 |
|
|
|
3489 |
|
|
|
3490 |
|
|
|
3491 |
|
|
|
3492 |
|
|
#
|
3493 |
|
|
# handle forking a monitor process, and set up variables
|
3494 |
|
|
#
|
3495 |
|
|
sub run_monitor {
|
3496 |
|
|
my ($group, $service) = @_;
|
3497 |
|
|
my (@args, @groupargs, $pid, @ghosts, $monitor, $monitorargs);
|
3498 |
|
|
|
3499 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
3500 |
|
|
|
3501 |
|
|
($monitor, $monitorargs) = ($sref->{"monitor"} =~ /^(\S+)(\s+(.*))?$/);
|
3502 |
|
|
|
3503 |
|
|
if (!defined $MONITORHASH{$monitor} || ! -f $MONITORHASH{$monitor}) {
|
3504 |
|
|
syslog ('err', "no monitor found while trying to run [$monitor]");
|
3505 |
|
|
return undef;
|
3506 |
|
|
} else {
|
3507 |
|
|
$monitor = $MONITORHASH{$monitor};
|
3508 |
|
|
}
|
3509 |
|
|
|
3510 |
|
|
$monitor .= " " . $monitorargs if ($monitorargs);
|
3511 |
|
|
|
3512 |
|
|
@ghosts = ();
|
3513 |
|
|
|
3514 |
|
|
#
|
3515 |
|
|
# if monitor ends with ";;", do not append groups
|
3516 |
|
|
# to command line
|
3517 |
|
|
#
|
3518 |
|
|
if ($monitor =~ /;;\s*$/) {
|
3519 |
|
|
$monitor =~ s/\s*;;\s*$//;
|
3520 |
|
|
@args = quotewords ('\s+', 0, $monitor);
|
3521 |
|
|
@ghosts = (1);
|
3522 |
|
|
|
3523 |
|
|
#
|
3524 |
|
|
# exclude disabled hosts
|
3525 |
|
|
#
|
3526 |
|
|
} else {
|
3527 |
|
|
@ghosts = grep (!/^\*/, @{$groups{$group}});
|
3528 |
|
|
|
3529 |
|
|
#
|
3530 |
|
|
# per-service excludes
|
3531 |
|
|
#
|
3532 |
|
|
if (keys %{$sref->{"exclude_hosts"}})
|
3533 |
|
|
{
|
3534 |
|
|
my @g = ();
|
3535 |
|
|
|
3536 |
|
|
for (my $i=0; $i<@ghosts; $i++)
|
3537 |
|
|
{
|
3538 |
|
|
push (@g, $ghosts[$i])
|
3539 |
|
|
if !$sref->{"exclude_hosts"}->{$ghosts[$i]};
|
3540 |
|
|
}
|
3541 |
|
|
|
3542 |
|
|
@ghosts = @g;
|
3543 |
|
|
}
|
3544 |
|
|
|
3545 |
|
|
#
|
3546 |
|
|
# per-host dependencies
|
3547 |
|
|
#
|
3548 |
|
|
if ((defined $sref->{"depend"} && $sref->{"depend"} ne "" && $sref->{"dep_behavior"} eq 'hm')
|
3549 |
|
|
|| (defined $sref->{"hostdepend"} && $sref->{"hostdepend"} ne ""))
|
3550 |
|
|
{
|
3551 |
|
|
my @g = ();
|
3552 |
|
|
my $sum = dep_summary($sref);
|
3553 |
|
|
|
3554 |
|
|
for (my $i=0; $i<@ghosts; $i++)
|
3555 |
|
|
{
|
3556 |
|
|
push (@g, $ghosts[$i])
|
3557 |
|
|
if (! grep /\Q$ghosts[$i]\E/, @$sum);
|
3558 |
|
|
}
|
3559 |
|
|
|
3560 |
|
|
@ghosts = @g;
|
3561 |
|
|
}
|
3562 |
|
|
|
3563 |
|
|
@args = (quotewords ('\s+', 0, $monitor), @ghosts);
|
3564 |
|
|
}
|
3565 |
|
|
|
3566 |
|
|
if (@ghosts == 0 && !defined ($sref->{"allow_empty_group"}))
|
3567 |
|
|
{
|
3568 |
|
|
syslog ('err', "monitor for $group/$service" .
|
3569 |
|
|
" not called because of no host arguments\n");
|
3570 |
|
|
reset_timer ($group, $service);
|
3571 |
|
|
}
|
3572 |
|
|
|
3573 |
|
|
else
|
3574 |
|
|
{
|
3575 |
|
|
$fhandles{"$group/$service"} = new FileHandle;
|
3576 |
|
|
|
3577 |
|
|
$pid = open ($fhandles{"$group/$service"}, '-|');
|
3578 |
|
|
|
3579 |
|
|
if (!defined $pid)
|
3580 |
|
|
{
|
3581 |
|
|
syslog ('err', "Could not fork: $!");
|
3582 |
|
|
delete $fhandles{"$group/$service"};
|
3583 |
|
|
return 0;
|
3584 |
|
|
}
|
3585 |
|
|
|
3586 |
|
|
elsif ($pid == 0)
|
3587 |
|
|
{
|
3588 |
|
|
open(STDERR, '>&STDOUT')
|
3589 |
|
|
or syslog ('err', "Could not dup stderr: $!");
|
3590 |
|
|
|
3591 |
|
|
open(STDIN, "</dev/null")
|
3592 |
|
|
or syslog ('err', "Could not connect stdin to /dev/null: $!");
|
3593 |
|
|
|
3594 |
|
|
my $v;
|
3595 |
|
|
|
3596 |
|
|
foreach $v (keys %{$sref->{"ENV"}})
|
3597 |
|
|
{
|
3598 |
|
|
$ENV{$v} = $sref->{"ENV"}->{$v};
|
3599 |
|
|
}
|
3600 |
|
|
$ENV{"MON_GROUP"} = $group;
|
3601 |
|
|
$ENV{"MON_SERVICE"} = $service;
|
3602 |
|
|
$ENV{"MON_LAST_SUMMARY"} = $sref->{"_last_summary"} if (defined $sref->{"_last_summary"});
|
3603 |
|
|
$ENV{"MON_LAST_OUTPUT"} = $sref->{"_last_output"} if (defined $sref->{"_last_output"});
|
3604 |
|
|
$ENV{"MON_LAST_FAILURE"} = $sref->{"_last_failure"} if (defined $sref->{"_last_failure"});
|
3605 |
|
|
$ENV{"MON_FIRST_FAILURE"} = $sref->{"_first_failure"} if (defined $sref->{"_first_failure"});
|
3606 |
|
|
$ENV{"MON_DEPEND_STATUS"} = $sref->{"_depend_status"} if (defined $sref->{"_depend_status"});
|
3607 |
|
|
$ENV{"MON_FIRST_SUCCESS"} = $sref->{"_first_success"} if (defined $sref->{"_first_success"});
|
3608 |
|
|
$ENV{"MON_LAST_SUCCESS"} = $sref->{"_last_success"} if (defined $sref->{"_last_success"});
|
3609 |
|
|
$ENV{"MON_DESCRIPTION"} = $sref->{"description"} if (defined $sref->{"description"});
|
3610 |
|
|
$ENV{"MON_STATEDIR"} = $CF{"STATEDIR"};
|
3611 |
|
|
$ENV{"MON_LOGDIR"} = $CF{"LOGDIR"};
|
3612 |
|
|
$ENV{"MON_CFBASEDIR"} = $CF{"CFBASEDIR"};
|
3613 |
|
|
|
3614 |
|
|
if (!exec @args)
|
3615 |
|
|
{
|
3616 |
|
|
syslog ('err', "could not exec '@args': $!");
|
3617 |
|
|
exit (1);
|
3618 |
|
|
}
|
3619 |
|
|
}
|
3620 |
|
|
|
3621 |
|
|
$sref->{"_last_check"} = scalar (time);
|
3622 |
|
|
$sref->{"_monitor_running"} = 1;
|
3623 |
|
|
|
3624 |
|
|
debug (1, "watching file handle ", fileno ($fhandles{"$group/$service"}),
|
3625 |
|
|
" for $group/$service\n");
|
3626 |
|
|
|
3627 |
|
|
#
|
3628 |
|
|
# set nonblocking I/O and setup bit vector for select(2)
|
3629 |
|
|
#
|
3630 |
|
|
configure_filehandle ($fhandles{"$group/$service"}) ||
|
3631 |
|
|
syslog ("err", "could not configure filehandle for $group/$service: $!");
|
3632 |
|
|
vec ($fdset_rbits,
|
3633 |
|
|
fileno($fhandles{"$group/$service"}), 1) = 1;
|
3634 |
|
|
$fdset_ebits |= $fdset_rbits;
|
3635 |
|
|
|
3636 |
|
|
#
|
3637 |
|
|
# note that this is running
|
3638 |
|
|
#
|
3639 |
|
|
$running{"$group/$service"} = 1;
|
3640 |
|
|
$runningpid{$pid} = "$group/$service";
|
3641 |
|
|
$ibufs{"$group/$service"} = "";
|
3642 |
|
|
$procs++;
|
3643 |
|
|
}
|
3644 |
|
|
|
3645 |
|
|
if ($sref->{"_next_check"})
|
3646 |
|
|
{
|
3647 |
|
|
$sref->{"_next_check"} += $sref->{"interval"};
|
3648 |
|
|
} else {
|
3649 |
|
|
$sref->{"_next_check"} = time() + $sref->{"interval"};
|
3650 |
|
|
}
|
3651 |
|
|
|
3652 |
|
|
|
3653 |
|
|
|
3654 |
|
|
|
3655 |
|
|
}
|
3656 |
|
|
|
3657 |
|
|
|
3658 |
|
|
#
|
3659 |
|
|
# set the countdown timer for this service
|
3660 |
|
|
#
|
3661 |
|
|
sub reset_timer {
|
3662 |
|
|
my ($group, $service) = @_;
|
3663 |
|
|
|
3664 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
3665 |
|
|
|
3666 |
|
|
if ($sref->{"randskew"} != 0)
|
3667 |
|
|
{
|
3668 |
|
|
$sref->{"_timer"} = $sref->{"interval"} +
|
3669 |
|
|
(int (rand (2)) == 0 ? -int(rand($sref->{"randskew"}) + 1) :
|
3670 |
|
|
int(rand($sref->{"randskew"})+1));
|
3671 |
|
|
}
|
3672 |
|
|
|
3673 |
|
|
elsif ($sref->{"_next_check"})
|
3674 |
|
|
{
|
3675 |
|
|
if (($sref->{"_timer"} = $sref->{"_next_check"} - time()) < 0)
|
3676 |
|
|
{
|
3677 |
|
|
$sref->{"_timer"} = $sref->{"interval"};
|
3678 |
|
|
}
|
3679 |
|
|
}
|
3680 |
|
|
|
3681 |
|
|
else
|
3682 |
|
|
{
|
3683 |
|
|
$sref->{"_timer"} = $sref->{"interval"};
|
3684 |
|
|
}
|
3685 |
|
|
}
|
3686 |
|
|
|
3687 |
|
|
|
3688 |
|
|
#
|
3689 |
|
|
# randomize the delay before each test
|
3690 |
|
|
# $opt{"randstart"} is seconds
|
3691 |
|
|
#
|
3692 |
|
|
sub randomize_startdelay {
|
3693 |
|
|
my ($group, $service);
|
3694 |
|
|
|
3695 |
|
|
foreach $group (keys %watch) {
|
3696 |
|
|
foreach $service (keys %{$watch{$group}}) {
|
3697 |
|
|
$watch{$group}->{$service}->{"_timer"} =
|
3698 |
|
|
int (rand ($CF{"RANDSTART"}));
|
3699 |
|
|
}
|
3700 |
|
|
}
|
3701 |
|
|
|
3702 |
|
|
}
|
3703 |
|
|
|
3704 |
|
|
|
3705 |
|
|
#
|
3706 |
|
|
# return 1 if $val is within $range,
|
3707 |
|
|
# where $range = "number" or "number-number"
|
3708 |
|
|
#
|
3709 |
|
|
sub inRange {
|
3710 |
|
|
my ($val, $range) = @_;
|
3711 |
|
|
my ($retval);
|
3712 |
|
|
|
3713 |
|
|
$retval = 0;
|
3714 |
|
|
if ($range =~ /^(\d+)$/ && $val == $1) {
|
3715 |
|
|
$retval = 1
|
3716 |
|
|
|
3717 |
|
|
} elsif ($range =~ /^(\d+)\s*-\s*(\d+)$/ &&
|
3718 |
|
|
($val >= $1 && $val <= $2)) {
|
3719 |
|
|
$retval = 1
|
3720 |
|
|
}
|
3721 |
|
|
|
3722 |
|
|
$retval;
|
3723 |
|
|
}
|
3724 |
|
|
|
3725 |
|
|
|
3726 |
|
|
#
|
3727 |
|
|
# disable ($cmd==0) or enable a watch
|
3728 |
|
|
#
|
3729 |
|
|
sub disen_watch {
|
3730 |
|
|
my ($w, $cmd) = @_;
|
3731 |
|
|
|
3732 |
|
|
return undef if (!defined ($watch{$w}));
|
3733 |
|
|
if (!$cmd) {
|
3734 |
|
|
$watch_disabled{$w} = 1;
|
3735 |
|
|
} else {
|
3736 |
|
|
$watch_disabled{$w} = 0;
|
3737 |
|
|
}
|
3738 |
|
|
}
|
3739 |
|
|
|
3740 |
|
|
|
3741 |
|
|
#
|
3742 |
|
|
# disable ($cmd==0) or enable a service
|
3743 |
|
|
#
|
3744 |
|
|
sub disen_service {
|
3745 |
|
|
my ($g, $s, $cmd) = @_;
|
3746 |
|
|
my ($snum);
|
3747 |
|
|
|
3748 |
|
|
return undef if (!defined $watch{$g});
|
3749 |
|
|
return undef if (!defined $watch{$g}->{$s});
|
3750 |
|
|
if (!$cmd) {
|
3751 |
|
|
$watch{$g}->{$s}->{"disable"} = 1;
|
3752 |
|
|
} else {
|
3753 |
|
|
$watch{$g}->{$s}->{"disable"} = 0;
|
3754 |
|
|
}
|
3755 |
|
|
}
|
3756 |
|
|
|
3757 |
|
|
|
3758 |
|
|
#
|
3759 |
|
|
# disable ($cmd==0) or enable a host
|
3760 |
|
|
#
|
3761 |
|
|
sub disen_host {
|
3762 |
|
|
my ($h, $cmd) = @_;
|
3763 |
|
|
|
3764 |
|
|
my $found = undef;
|
3765 |
|
|
|
3766 |
|
|
foreach my $g (keys %groups) {
|
3767 |
|
|
if ((!defined $cmd) || $cmd == 0) {
|
3768 |
|
|
if (grep (s/^$h$/*$h/, @{$groups{$g}}))
|
3769 |
|
|
{
|
3770 |
|
|
$found = 1;
|
3771 |
|
|
}
|
3772 |
|
|
}
|
3773 |
|
|
else
|
3774 |
|
|
{
|
3775 |
|
|
if (grep (s/^\*$h$/$h/, @{$groups{$g}}))
|
3776 |
|
|
{
|
3777 |
|
|
$found = 1;
|
3778 |
|
|
}
|
3779 |
|
|
}
|
3780 |
|
|
}
|
3781 |
|
|
|
3782 |
|
|
$found;
|
3783 |
|
|
}
|
3784 |
|
|
|
3785 |
|
|
|
3786 |
|
|
sub host_exists {
|
3787 |
|
|
my $host = shift;
|
3788 |
|
|
|
3789 |
|
|
my $found = 0;
|
3790 |
|
|
|
3791 |
|
|
foreach my $g (keys %groups) {
|
3792 |
|
|
if (grep (/^$host$/, @{$groups{$g}}))
|
3793 |
|
|
{
|
3794 |
|
|
$found = 1;
|
3795 |
|
|
last;
|
3796 |
|
|
}
|
3797 |
|
|
}
|
3798 |
|
|
|
3799 |
|
|
$found;
|
3800 |
|
|
}
|
3801 |
|
|
|
3802 |
|
|
|
3803 |
|
|
|
3804 |
|
|
#
|
3805 |
|
|
# given a host, search groups and return an array of group
|
3806 |
|
|
# names which have that host as their only member. return
|
3807 |
|
|
# an empty array if no group found
|
3808 |
|
|
#
|
3809 |
|
|
#
|
3810 |
|
|
sub host_singleton_group {
|
3811 |
|
|
my $host = shift;
|
3812 |
|
|
|
3813 |
|
|
my @found;
|
3814 |
|
|
|
3815 |
|
|
foreach my $g (keys %groups) {
|
3816 |
|
|
if (grep (/^\*?$host$/, @{$groups{$g}}) &&
|
3817 |
|
|
scalar(@{$groups{$g}}) == 1)
|
3818 |
|
|
{
|
3819 |
|
|
push (@found, $g);
|
3820 |
|
|
}
|
3821 |
|
|
}
|
3822 |
|
|
|
3823 |
|
|
return (@found);
|
3824 |
|
|
}
|
3825 |
|
|
|
3826 |
|
|
|
3827 |
|
|
#
|
3828 |
|
|
# save state
|
3829 |
|
|
#
|
3830 |
|
|
sub save_state {
|
3831 |
|
|
my (@states) = @_;
|
3832 |
|
|
my ($group, $service, @l, $state);
|
3833 |
|
|
|
3834 |
|
|
foreach $state (@states) {
|
3835 |
|
|
if ($state eq "disabled" || $state eq "all") {
|
3836 |
|
|
if (!open (STATE, ">$CF{STATEDIR}/disabled")) {
|
3837 |
|
|
syslog ("err", "could not write to state file: $!");
|
3838 |
|
|
next;
|
3839 |
|
|
}
|
3840 |
|
|
|
3841 |
|
|
foreach $group (keys %groups) {
|
3842 |
|
|
@l = grep (/^\*/, @{$groups{$group}});
|
3843 |
|
|
if (@l) {
|
3844 |
|
|
grep (s/^\*//, @l);
|
3845 |
|
|
grep { print STATE "disable host $_\n" } @l;
|
3846 |
|
|
}
|
3847 |
|
|
}
|
3848 |
|
|
foreach $group (keys %watch) {
|
3849 |
|
|
if (exists $watch_disabled{$group} && $watch_disabled{$group} == 1) {
|
3850 |
|
|
print STATE "disable watch $group\n";
|
3851 |
|
|
}
|
3852 |
|
|
foreach $service (keys %{$watch{$group}}) {
|
3853 |
|
|
if (defined $watch{$group}->{$service}->{'disable'}
|
3854 |
|
|
&& $watch{$group}->{$service}->{'disable'} == 1) {
|
3855 |
|
|
print STATE "disable service $group $service\n";
|
3856 |
|
|
}
|
3857 |
|
|
}
|
3858 |
|
|
}
|
3859 |
|
|
close (STATE);
|
3860 |
|
|
|
3861 |
|
|
}
|
3862 |
|
|
|
3863 |
|
|
if ($state eq "opstatus" || $state eq "all") {
|
3864 |
|
|
if (!open (STATE, ">$CF{STATEDIR}/opstatus")) {
|
3865 |
|
|
syslog ("err", "could not write to opstatus state file: $!");
|
3866 |
|
|
next;
|
3867 |
|
|
}
|
3868 |
|
|
foreach $group (keys %watch) {
|
3869 |
|
|
foreach $service (keys %{$watch{$group}}) {
|
3870 |
|
|
print STATE "group=$group\tservice=$service";
|
3871 |
|
|
foreach my $var (qw(op_status failure_count alert_count last_success first_success
|
3872 |
|
|
consec_failures last_failure first_failure last_summary
|
3873 |
|
|
last_failure_time last_failure_summary last_failure_detail
|
3874 |
|
|
last_detail ack ack_comment last_trap last_traphost exitval
|
3875 |
|
|
last_check last_op_status failure_output trap_timer)) {
|
3876 |
|
|
print STATE "\t$var=" . esc_str($watch{$group}->{$service}->{"_$var"});
|
3877 |
|
|
}
|
3878 |
|
|
foreach my $periodlabel (keys %{$watch{$group}->{$service}->{periods}}) {
|
3879 |
|
|
foreach my $var (qw(last_alert alert_sent 1stfailtime failcount)) {
|
3880 |
|
|
print STATE "\t$periodlabel:$var=" . esc_str($watch{$group}->{$service}{periods}{$periodlabel}{"_$var"});
|
3881 |
|
|
}
|
3882 |
|
|
}
|
3883 |
|
|
print STATE "\n";
|
3884 |
|
|
}
|
3885 |
|
|
}
|
3886 |
|
|
close (STATE);
|
3887 |
|
|
}
|
3888 |
|
|
}
|
3889 |
|
|
}
|
3890 |
|
|
|
3891 |
|
|
|
3892 |
|
|
#
|
3893 |
|
|
# load state
|
3894 |
|
|
#
|
3895 |
|
|
sub load_state {
|
3896 |
|
|
my (@states) = @_;
|
3897 |
|
|
my ($l, $cmd, $args, $group, $service, $what, $state);
|
3898 |
|
|
|
3899 |
|
|
foreach $state (@states) {
|
3900 |
|
|
if ($state eq "disabled" || $state eq "all") {
|
3901 |
|
|
if (!open (STATE, "$CF{STATEDIR}/disabled")) {
|
3902 |
|
|
syslog ("err", "could not read state file: $!");
|
3903 |
|
|
next;
|
3904 |
|
|
}
|
3905 |
|
|
|
3906 |
|
|
while (defined ($l = <STATE>)) {
|
3907 |
|
|
chomp $l;
|
3908 |
|
|
($cmd, $what, $args) = split (/\s+/, $l, 3);
|
3909 |
|
|
|
3910 |
|
|
next if ($cmd ne "disable");
|
3911 |
|
|
|
3912 |
|
|
if ($what eq "host") {
|
3913 |
|
|
disen_host ($args);
|
3914 |
|
|
} elsif ($what eq "watch") {
|
3915 |
|
|
syslog ("err", "undefined watch reading state file: $l")
|
3916 |
|
|
if (!defined disen_watch ($args));
|
3917 |
|
|
} elsif ($what eq "service") {
|
3918 |
|
|
($group, $service) = split (/\s+/, $args, 2);
|
3919 |
|
|
syslog ("err",
|
3920 |
|
|
"undefined group or service reading state file: $l")
|
3921 |
|
|
if (!defined disen_service ($group, $service));
|
3922 |
|
|
}
|
3923 |
|
|
}
|
3924 |
|
|
|
3925 |
|
|
syslog ("info", "state '$state' loaded");
|
3926 |
|
|
close (STATE);
|
3927 |
|
|
}
|
3928 |
|
|
|
3929 |
|
|
if ($state eq "opstatus" || $state eq "all") {
|
3930 |
|
|
if (!open (STATE, "$CF{STATEDIR}/opstatus")) {
|
3931 |
|
|
syslog ("err", "could not read state file: $!");
|
3932 |
|
|
next;
|
3933 |
|
|
}
|
3934 |
|
|
|
3935 |
|
|
while (defined ($l = <STATE>)) {
|
3936 |
|
|
chomp $l;
|
3937 |
|
|
my %opstatus = map{ /^(.*)=(.*)$/; $1 => $2} split (/\t/, $l,);
|
3938 |
|
|
next unless (exists $opstatus{group} && exists $watch{$opstatus{group}}
|
3939 |
|
|
&& exists $opstatus{service} && exists $watch{$opstatus{group}}->{$opstatus{service}});
|
3940 |
|
|
|
3941 |
|
|
foreach my $op (keys %opstatus) {
|
3942 |
|
|
next if ($op eq 'group' || $op eq 'service');
|
3943 |
|
|
if ($op =~ /^(.*):(.*)$/) {
|
3944 |
|
|
next unless exists $watch{$opstatus{group}}->{$opstatus{service}}{periods}{$1};
|
3945 |
|
|
$watch{$opstatus{group}}->{$opstatus{service}}{periods}{$1}{"_$2"} = un_esc_str($opstatus{$op});
|
3946 |
|
|
} else {
|
3947 |
|
|
$watch{$opstatus{group}}->{$opstatus{service}}{"_$op"} = un_esc_str($opstatus{$op});
|
3948 |
|
|
}
|
3949 |
|
|
}
|
3950 |
|
|
}
|
3951 |
|
|
syslog ("info", "state '$state' loaded");
|
3952 |
|
|
close (STATE);
|
3953 |
|
|
}
|
3954 |
|
|
}
|
3955 |
|
|
}
|
3956 |
|
|
|
3957 |
|
|
|
3958 |
|
|
#
|
3959 |
|
|
# authenticate a login
|
3960 |
|
|
#
|
3961 |
|
|
sub auth {
|
3962 |
|
|
my ($type, $user, $plaintext, $host) = @_;
|
3963 |
|
|
my ($pass, %u, $l, $u, $p);
|
3964 |
|
|
|
3965 |
|
|
|
3966 |
|
|
if ($user eq "" || ($type ne 'trustlocal' && $plaintext eq "")) {
|
3967 |
|
|
syslog ('err', "an undef username or password supplied");
|
3968 |
|
|
return undef;
|
3969 |
|
|
}
|
3970 |
|
|
|
3971 |
|
|
#
|
3972 |
|
|
# standard UNIX passwd
|
3973 |
|
|
#
|
3974 |
|
|
if ($type eq "getpwnam") {
|
3975 |
|
|
(undef, $pass) = getpwnam($user);
|
3976 |
|
|
return undef
|
3977 |
|
|
if (!defined $pass);
|
3978 |
|
|
|
3979 |
|
|
if ((crypt ($plaintext, $pass)) ne $pass) {
|
3980 |
|
|
return undef;
|
3981 |
|
|
}
|
3982 |
|
|
return 1;
|
3983 |
|
|
|
3984 |
|
|
#
|
3985 |
|
|
# shadow password
|
3986 |
|
|
#
|
3987 |
|
|
} elsif ($type eq "shadow") {
|
3988 |
|
|
|
3989 |
|
|
#
|
3990 |
|
|
# "mon" authentication
|
3991 |
|
|
#
|
3992 |
|
|
} elsif ($type eq "userfile") {
|
3993 |
|
|
if (!open (U, $CF{"USERFILE"})) {
|
3994 |
|
|
syslog ('err', "could not open user file '$CF{USERFILE}': $!");
|
3995 |
|
|
return undef;
|
3996 |
|
|
}
|
3997 |
|
|
while (<U>) {
|
3998 |
|
|
next if (/^\s*#/ || /^\s*$/);
|
3999 |
|
|
chomp;
|
4000 |
|
|
($u,$p) = split (/\s*:\s*/, $_, 2);
|
4001 |
|
|
$u{$u} = $p;
|
4002 |
|
|
}
|
4003 |
|
|
close (U);
|
4004 |
|
|
return undef if (!defined($u{$user})); #user was not found in userfile
|
4005 |
|
|
return undef if ((crypt ($plaintext, $u{$user})) ne $u{$user}); #user gave wrong password
|
4006 |
|
|
return 1;
|
4007 |
|
|
|
4008 |
|
|
#
|
4009 |
|
|
# PAM authentication
|
4010 |
|
|
#
|
4011 |
|
|
} elsif ($type eq "pam") {
|
4012 |
|
|
local $PAM_username = $user;
|
4013 |
|
|
local $PAM_password = $plaintext;
|
4014 |
|
|
my $pamh;
|
4015 |
|
|
if (!ref($pamh = new Authen::PAM($CF{'PAMSERVICE'}, $PAM_username, \&pam_conv_func))) {
|
4016 |
|
|
syslog ('err', "Error code $pamh during PAM init!: $!");
|
4017 |
|
|
return undef;
|
4018 |
|
|
}
|
4019 |
|
|
my $res = $pamh->pam_authenticate ;
|
4020 |
|
|
return undef if ($res != &Authen::PAM::PAM_SUCCESS) ;
|
4021 |
|
|
return 1;
|
4022 |
|
|
} elsif ($type eq "trustlocal") {
|
4023 |
|
|
# We're configured to trust all authentications from localhost
|
4024 |
|
|
# i.e. cgi scripts are handling authentication themselves
|
4025 |
|
|
return undef if ($host ne "127.0.0.1");
|
4026 |
|
|
return 1;
|
4027 |
|
|
} else {
|
4028 |
|
|
syslog ('err', "authentication type '$type' not known");
|
4029 |
|
|
}
|
4030 |
|
|
|
4031 |
|
|
return undef;
|
4032 |
|
|
}
|
4033 |
|
|
|
4034 |
|
|
|
4035 |
|
|
#
|
4036 |
|
|
# load the table of who can do which commands
|
4037 |
|
|
#
|
4038 |
|
|
sub load_auth {
|
4039 |
|
|
my ($startup) = @_;
|
4040 |
|
|
my ($l, $cmd, $users, $u, $host, $user, $password, $sect);
|
4041 |
|
|
|
4042 |
|
|
%AUTHCMDS = ();
|
4043 |
|
|
%NOAUTHCMDS = ();
|
4044 |
|
|
%AUTHTRAPS = ();
|
4045 |
|
|
$sect = "command";
|
4046 |
|
|
|
4047 |
|
|
if (!open (C, $CF{"AUTHFILE"})) {
|
4048 |
|
|
err_startup ($startup, "could not open $CF{AUTHFILE}: $!");
|
4049 |
|
|
return undef;
|
4050 |
|
|
}
|
4051 |
|
|
|
4052 |
|
|
while (defined ($l = <C>)) {
|
4053 |
|
|
next if ($l =~ /^\s*#/ || $l =~ /^\s*$/);
|
4054 |
|
|
chomp $l;
|
4055 |
|
|
$l =~ s/^\s*//;
|
4056 |
|
|
$l =~ s/\s*$//;
|
4057 |
|
|
|
4058 |
|
|
if ($l =~ /^command\s+section/) {
|
4059 |
|
|
$sect = "command";
|
4060 |
|
|
next;
|
4061 |
|
|
} elsif ($l =~ /^trap\s+section/) {
|
4062 |
|
|
$sect = "trap";
|
4063 |
|
|
next;
|
4064 |
|
|
}
|
4065 |
|
|
|
4066 |
|
|
if ($sect eq "command") {
|
4067 |
|
|
($cmd, $users) = split (/\s*:\s*/, $l, 2);
|
4068 |
|
|
if (!defined $users) {
|
4069 |
|
|
err_startup ($startup, "could not parse line $. of auth file\n");
|
4070 |
|
|
next;
|
4071 |
|
|
}
|
4072 |
|
|
foreach $u (split (/\s*,\s*/, $users)) {
|
4073 |
|
|
if ( $u =~ /^AUTH_ANY$/ ) {
|
4074 |
|
|
# Allow all authenticated users
|
4075 |
|
|
$AUTHCMDS{"\L$cmd"}{$u} = 1;
|
4076 |
|
|
} elsif ( $u =~ /^!(.*)/ ) {
|
4077 |
|
|
# Directive is to "deny-user"
|
4078 |
|
|
$NOAUTHCMDS{"\L$cmd"}{$1} = 1;
|
4079 |
|
|
} else {
|
4080 |
|
|
# Directive is to "allow-user"
|
4081 |
|
|
$AUTHCMDS{"\L$cmd"}{$u} = 1;
|
4082 |
|
|
}
|
4083 |
|
|
}
|
4084 |
|
|
|
4085 |
|
|
} elsif ($sect eq "trap") {
|
4086 |
|
|
if ($l !~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
|
4087 |
|
|
syslog ('err', "invalid entry in trap sect of $CF{AUTHFILE}, line $.");
|
4088 |
|
|
next;
|
4089 |
|
|
}
|
4090 |
|
|
($host, $user, $password) = ($1, $2, $3);
|
4091 |
|
|
|
4092 |
|
|
if ($host eq "*") {
|
4093 |
|
|
#
|
4094 |
|
|
# allow traps from all hosts
|
4095 |
|
|
#
|
4096 |
|
|
|
4097 |
|
|
} elsif ($host =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/) {
|
4098 |
|
|
if (($host = inet_aton ($host)) eq "") {
|
4099 |
|
|
syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
|
4100 |
|
|
next;
|
4101 |
|
|
}
|
4102 |
|
|
} elsif ($host =~ /^[A-Z\d][[A-Z\.\d\-]*[[A-Z\d]+$/i) {
|
4103 |
|
|
if (($host = inet_aton ($host)) eq "") {
|
4104 |
|
|
syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
|
4105 |
|
|
next;
|
4106 |
|
|
}
|
4107 |
|
|
} else {
|
4108 |
|
|
syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
|
4109 |
|
|
next;
|
4110 |
|
|
}
|
4111 |
|
|
|
4112 |
|
|
if ($host ne "*")
|
4113 |
|
|
{
|
4114 |
|
|
$host = inet_ntoa ($host);
|
4115 |
|
|
}
|
4116 |
|
|
|
4117 |
|
|
syslog ('notice', "Adding trap auth of: $host $user $password");
|
4118 |
|
|
$AUTHTRAPS{$host}{$user} = $password;
|
4119 |
|
|
|
4120 |
|
|
} else {
|
4121 |
|
|
syslog ('err', "unknown section in $CF{AUTHFILE}: $l");
|
4122 |
|
|
}
|
4123 |
|
|
}
|
4124 |
|
|
close (C);
|
4125 |
|
|
}
|
4126 |
|
|
|
4127 |
|
|
sub load_view_users {}
|
4128 |
|
|
|
4129 |
|
|
sub view_match {
|
4130 |
|
|
my ($view, $group, $service) = @_;
|
4131 |
|
|
if (!defined($view)) {
|
4132 |
|
|
# print STDERR "No view in use\n";
|
4133 |
|
|
return 1;
|
4134 |
|
|
}
|
4135 |
|
|
|
4136 |
|
|
if (defined($group) && defined($views{$view}->{$group})) {
|
4137 |
|
|
# print STDERR "View $view contains $group\n";
|
4138 |
|
|
return 1;
|
4139 |
|
|
}
|
4140 |
|
|
if (defined($views{$view}->{$group.":".$service})) {
|
4141 |
|
|
# print STDERR "View $view contains $group:$service\n";
|
4142 |
|
|
return 1;
|
4143 |
|
|
}
|
4144 |
|
|
return 0;
|
4145 |
|
|
}
|
4146 |
|
|
|
4147 |
|
|
#
|
4148 |
|
|
# return undef if $user isn't permitted to perform $cmd
|
4149 |
|
|
# Optional third argument controls logging to syslog.
|
4150 |
|
|
# e.g.,
|
4151 |
|
|
# check_auth("joe", "disable")
|
4152 |
|
|
# will check to see if user joe is authorized to disable, and
|
4153 |
|
|
# complain to syslog if joe is not authorized
|
4154 |
|
|
# check_auth("joe", "disable", 1)
|
4155 |
|
|
# will check to see if user joe is authorized to disable but
|
4156 |
|
|
# NOT complain to syslog if joe is not authorized
|
4157 |
|
|
#
|
4158 |
|
|
sub check_auth {
|
4159 |
|
|
my ($user, $cmd, $no_syslog) = @_;
|
4160 |
|
|
|
4161 |
|
|
#
|
4162 |
|
|
# Check to see if the authenticated user is specifically
|
4163 |
|
|
# denied the ability to run this command.
|
4164 |
|
|
#
|
4165 |
|
|
if (
|
4166 |
|
|
(defined ($user) && $NOAUTHCMDS{$cmd}{$user}) ||
|
4167 |
|
|
(defined ($user) && $NOAUTHCMDS{$cmd}{"AUTH_ANY"})
|
4168 |
|
|
)
|
4169 |
|
|
{
|
4170 |
|
|
syslog ("err", "user '$user' tried '$cmd', denied");
|
4171 |
|
|
return undef;
|
4172 |
|
|
}
|
4173 |
|
|
|
4174 |
|
|
#
|
4175 |
|
|
# Check for "all". This allows any client, authenticated or
|
4176 |
|
|
# not, to execute the requested command.
|
4177 |
|
|
#
|
4178 |
|
|
return 1 if ($AUTHCMDS{$cmd}{"all"});
|
4179 |
|
|
|
4180 |
|
|
#
|
4181 |
|
|
# Check for AUTH_ANY. This allows any authenticated user to
|
4182 |
|
|
# execute the requested command.
|
4183 |
|
|
#
|
4184 |
|
|
return 1 if (defined ($user) && $AUTHCMDS{$cmd}{"AUTH_ANY"});
|
4185 |
|
|
|
4186 |
|
|
#
|
4187 |
|
|
# Check to see if the authenticated user is specifically
|
4188 |
|
|
#allowed the ability to run this command.
|
4189 |
|
|
#
|
4190 |
|
|
return 1 if (defined ($user) && $AUTHCMDS{$cmd}{$user});
|
4191 |
|
|
|
4192 |
|
|
syslog ("err", "user '$user' tried '$cmd', not authenticated") unless defined($no_syslog);
|
4193 |
|
|
|
4194 |
|
|
return undef;
|
4195 |
|
|
}
|
4196 |
|
|
|
4197 |
|
|
|
4198 |
|
|
#
|
4199 |
|
|
# reload things
|
4200 |
|
|
#
|
4201 |
|
|
sub reload {
|
4202 |
|
|
my (@what) = @_;
|
4203 |
|
|
|
4204 |
|
|
for (@what) {
|
4205 |
|
|
if ($_ eq "auth") {
|
4206 |
|
|
load_auth;
|
4207 |
|
|
} else {
|
4208 |
|
|
return undef;
|
4209 |
|
|
}
|
4210 |
|
|
}
|
4211 |
|
|
|
4212 |
|
|
return 1;
|
4213 |
|
|
}
|
4214 |
|
|
|
4215 |
|
|
|
4216 |
|
|
sub err_startup {
|
4217 |
|
|
my ($startup, $msg) = @_;
|
4218 |
|
|
|
4219 |
|
|
if ($startup) {
|
4220 |
|
|
die "$msg\n";
|
4221 |
|
|
} else {
|
4222 |
|
|
syslog ('err', $msg);
|
4223 |
|
|
}
|
4224 |
|
|
}
|
4225 |
|
|
|
4226 |
|
|
|
4227 |
|
|
#
|
4228 |
|
|
# handle a trap
|
4229 |
|
|
#
|
4230 |
|
|
sub handle_trap {
|
4231 |
|
|
my ($buf, $from) = @_;
|
4232 |
|
|
|
4233 |
|
|
my $time = time;
|
4234 |
|
|
my %trap = ();
|
4235 |
|
|
my $flags = 0;
|
4236 |
|
|
my $tmnow = time;
|
4237 |
|
|
my $intended;
|
4238 |
|
|
my $fromip;
|
4239 |
|
|
|
4240 |
|
|
#
|
4241 |
|
|
# MON-specific tags
|
4242 |
|
|
# pro protocol
|
4243 |
|
|
# aut auth
|
4244 |
|
|
# usr username
|
4245 |
|
|
# pas password
|
4246 |
|
|
# typ type ("failure", "up", "startup", "trap", "traptimeout")
|
4247 |
|
|
# spc specific type (STAT_OK, etc.) THIS IS NO LONGER USED
|
4248 |
|
|
# seq sequence
|
4249 |
|
|
# grp group
|
4250 |
|
|
# svc service
|
4251 |
|
|
# hst host
|
4252 |
|
|
# sta status (same as exit status of a monitor)
|
4253 |
|
|
# tsp timestamp as time(2) value
|
4254 |
|
|
# sum summary output
|
4255 |
|
|
# dtl detail
|
4256 |
|
|
#
|
4257 |
|
|
|
4258 |
|
|
#
|
4259 |
|
|
# this part validates the trap
|
4260 |
|
|
#
|
4261 |
|
|
{
|
4262 |
|
|
foreach my $line (split (/\n/, $buf))
|
4263 |
|
|
{
|
4264 |
|
|
if ($line =~ /^(\w+)=(.*)/)
|
4265 |
|
|
{
|
4266 |
|
|
my $trap_name = $1;
|
4267 |
|
|
my $trap_val = $2;
|
4268 |
|
|
chomp $trap_val;
|
4269 |
|
|
$trap_val =~ s/^\'(.*)\'$/\1/;
|
4270 |
|
|
$trap{$trap_name} = un_esc_str ($trap_val);
|
4271 |
|
|
}
|
4272 |
|
|
|
4273 |
|
|
else
|
4274 |
|
|
{
|
4275 |
|
|
syslog ('err', "unspecified tag in trap: $line");
|
4276 |
|
|
}
|
4277 |
|
|
}
|
4278 |
|
|
|
4279 |
|
|
$trap{"sum"} = "$trap{sum}\n" if ($trap{"sum"} !~ /\n$/);
|
4280 |
|
|
|
4281 |
|
|
my ($port, $addr) = sockaddr_in ($from);
|
4282 |
|
|
$fromip = inet_ntoa ($addr);
|
4283 |
|
|
|
4284 |
|
|
#
|
4285 |
|
|
# trap authentication
|
4286 |
|
|
#
|
4287 |
|
|
my ($traphost, $trapuser, $trappass);
|
4288 |
|
|
|
4289 |
|
|
if (defined ($AUTHTRAPS{"*"}))
|
4290 |
|
|
{
|
4291 |
|
|
$traphost = "*";
|
4292 |
|
|
}
|
4293 |
|
|
|
4294 |
|
|
else
|
4295 |
|
|
{
|
4296 |
|
|
$traphost = $fromip;
|
4297 |
|
|
}
|
4298 |
|
|
|
4299 |
|
|
if (defined ($AUTHTRAPS{$traphost}{"*"}))
|
4300 |
|
|
{
|
4301 |
|
|
$trapuser = "*";
|
4302 |
|
|
$trappass = "";
|
4303 |
|
|
}
|
4304 |
|
|
|
4305 |
|
|
else
|
4306 |
|
|
{
|
4307 |
|
|
$trapuser = $trap{"usr"};
|
4308 |
|
|
$trappass = $trap{"pas"};
|
4309 |
|
|
}
|
4310 |
|
|
|
4311 |
|
|
if (!defined ($AUTHTRAPS{$traphost}))
|
4312 |
|
|
{
|
4313 |
|
|
syslog ('err', "received trap from unauthorized host: $fromip");
|
4314 |
|
|
return undef;
|
4315 |
|
|
}
|
4316 |
|
|
|
4317 |
|
|
if ($trapuser ne "*") {
|
4318 |
|
|
if (!defined $AUTHTRAPS{$traphost}{$trapuser} ||
|
4319 |
|
|
crypt ($trappass, $AUTHTRAPS{$traphost}{$trapuser}) ne
|
4320 |
|
|
$AUTHTRAPS{$traphost}{$trapuser})
|
4321 |
|
|
{
|
4322 |
|
|
syslog ('err', "received trap from unauthorized user $trapuser, host $traphost");
|
4323 |
|
|
return undef;
|
4324 |
|
|
}
|
4325 |
|
|
}
|
4326 |
|
|
|
4327 |
|
|
#
|
4328 |
|
|
# protocol version
|
4329 |
|
|
#
|
4330 |
|
|
if ($trap{"pro"} < $TRAP_PRO_VERSION)
|
4331 |
|
|
{
|
4332 |
|
|
syslog ('err', "cannot handle traps from version less than $TRAP_PRO_VERSION");
|
4333 |
|
|
return undef;
|
4334 |
|
|
}
|
4335 |
|
|
|
4336 |
|
|
#
|
4337 |
|
|
# validate trap type
|
4338 |
|
|
#
|
4339 |
|
|
if (!defined $trap{"sta"})
|
4340 |
|
|
{
|
4341 |
|
|
syslog ('err', "no trap sta value specified from $fromip");
|
4342 |
|
|
return undef;
|
4343 |
|
|
}
|
4344 |
|
|
|
4345 |
|
|
#
|
4346 |
|
|
# if mon receives a trap for an unknown group/service, then the
|
4347 |
|
|
# default/default group/service should catch these if it is defined
|
4348 |
|
|
#
|
4349 |
|
|
if (!defined $watch{$trap{"grp"}} && defined $watch{"default"})
|
4350 |
|
|
{
|
4351 |
|
|
$intended = "$trap{'grp'}:$trap{'svc'}";
|
4352 |
|
|
$trap{"grp"} = "default";
|
4353 |
|
|
}
|
4354 |
|
|
|
4355 |
|
|
if ($trap{"grp"} eq 'default'
|
4356 |
|
|
&& !defined($watch{default}->{$trap{"svc"}})
|
4357 |
|
|
&& defined($watch{'default'}->{'default'}))
|
4358 |
|
|
{
|
4359 |
|
|
$trap{"svc"} = "default";
|
4360 |
|
|
}
|
4361 |
|
|
|
4362 |
|
|
if (!defined ($groups{$trap{"grp"}}))
|
4363 |
|
|
{
|
4364 |
|
|
syslog ('err', "trap received for undefined group $trap{grp}");
|
4365 |
|
|
return;
|
4366 |
|
|
}
|
4367 |
|
|
|
4368 |
|
|
elsif (!defined $watch{$trap{"grp"}}->{$trap{"svc"}})
|
4369 |
|
|
{
|
4370 |
|
|
syslog ('err', "trap received for undefined service type $trap{grp}/$trap{svc}");
|
4371 |
|
|
return;
|
4372 |
|
|
}
|
4373 |
|
|
}
|
4374 |
|
|
|
4375 |
|
|
#
|
4376 |
|
|
# trap has been validated, proceed
|
4377 |
|
|
#
|
4378 |
|
|
my $sref = \%{$watch{$trap{"grp"}}->{$trap{"svc"}}};
|
4379 |
|
|
|
4380 |
|
|
#
|
4381 |
|
|
# a trap recieved resets the trap timeout timer
|
4382 |
|
|
#
|
4383 |
|
|
if (exists $sref->{"traptimeout"})
|
4384 |
|
|
{
|
4385 |
|
|
$sref->{"_trap_timer"} = $sref->{"traptimeout"};
|
4386 |
|
|
}
|
4387 |
|
|
|
4388 |
|
|
|
4389 |
|
|
$sref->{"_last_trap"} = $time;
|
4390 |
|
|
|
4391 |
|
|
if ($intended)
|
4392 |
|
|
{
|
4393 |
|
|
$sref->{"_intended"} = $intended;
|
4394 |
|
|
}
|
4395 |
|
|
|
4396 |
|
|
syslog ('info', "trap $trap{typ} $trap{spc} from " .
|
4397 |
|
|
"$fromip grp=$trap{grp} svc=$trap{svc}, sta=$trap{sta}\n");
|
4398 |
|
|
|
4399 |
|
|
$sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
|
4400 |
|
|
if ($sref->{"trapduration"});
|
4401 |
|
|
|
4402 |
|
|
process_event ("t", $trap{"grp"}, $trap{"svc"}, $trap{"sta"}, "$trap{sum}\n$trap{dtl}");
|
4403 |
|
|
|
4404 |
|
|
if( defined($sref->{"_intended"}) )
|
4405 |
|
|
{
|
4406 |
|
|
undef($sref->{"_intended"});
|
4407 |
|
|
}
|
4408 |
|
|
}
|
4409 |
|
|
|
4410 |
|
|
|
4411 |
|
|
#
|
4412 |
|
|
# trap timeout
|
4413 |
|
|
#
|
4414 |
|
|
sub handle_trap_timeout {
|
4415 |
|
|
my ($group, $service) = @_;
|
4416 |
|
|
my ($tmnow);
|
4417 |
|
|
|
4418 |
|
|
$tmnow = time;
|
4419 |
|
|
|
4420 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
4421 |
|
|
$sref->{"_trap_timer"} = $sref->{"traptimeout"};
|
4422 |
|
|
process_event ("T", $group, $service, 1,
|
4423 |
|
|
"trap timeout\n" .
|
4424 |
|
|
"trap timeout after " . $sref->{"traptimeout"} . "s at " . localtime ($tmnow) . "\n");
|
4425 |
|
|
}
|
4426 |
|
|
|
4427 |
|
|
|
4428 |
|
|
#
|
4429 |
|
|
# write to a socket
|
4430 |
|
|
#
|
4431 |
|
|
sub sock_write {
|
4432 |
|
|
my ($sock, $buf) = @_;
|
4433 |
|
|
my ($nleft, $nwritten);
|
4434 |
|
|
|
4435 |
|
|
$nleft = length ($buf);
|
4436 |
|
|
while ($nleft) {
|
4437 |
|
|
$nwritten = syswrite ($sock, $buf, $nleft);
|
4438 |
|
|
if (!defined ($nwritten)) {
|
4439 |
|
|
return undef if ($! != EAGAIN);
|
4440 |
|
|
usleep (100000);
|
4441 |
|
|
next;
|
4442 |
|
|
}
|
4443 |
|
|
$nleft -= $nwritten;
|
4444 |
|
|
substr ($buf, 0, $nwritten) = "";
|
4445 |
|
|
}
|
4446 |
|
|
}
|
4447 |
|
|
|
4448 |
|
|
|
4449 |
|
|
#
|
4450 |
|
|
# do I/O processing for traps and client connections
|
4451 |
|
|
#
|
4452 |
|
|
sub handle_io {
|
4453 |
|
|
|
4454 |
|
|
#
|
4455 |
|
|
# build iovec for server connections, traps, and clients
|
4456 |
|
|
#
|
4457 |
|
|
$iovec = '';
|
4458 |
|
|
my $niovec = '';
|
4459 |
|
|
vec ($iovec, fileno (TRAPSERVER), 1) = 1;
|
4460 |
|
|
vec ($iovec, fileno (SERVER), 1) = 1;
|
4461 |
|
|
foreach my $cl (keys %clients) {
|
4462 |
|
|
vec ($iovec, $cl, 1) = 1;
|
4463 |
|
|
}
|
4464 |
|
|
|
4465 |
|
|
#
|
4466 |
|
|
# handle client I/O while there is some to handle
|
4467 |
|
|
#
|
4468 |
|
|
my $sleep = $SLEEPINT;
|
4469 |
|
|
my $tm0 = [gettimeofday];
|
4470 |
|
|
my $n;
|
4471 |
|
|
while ($n = select ($niovec = $iovec, undef, undef, $sleep)) {
|
4472 |
|
|
my $tm1 = [gettimeofday];
|
4473 |
|
|
|
4474 |
|
|
if ($! != &EINTR)
|
4475 |
|
|
{
|
4476 |
|
|
#
|
4477 |
|
|
# mon trap
|
4478 |
|
|
#
|
4479 |
|
|
if (vec ($niovec, fileno (TRAPSERVER), 1)) {
|
4480 |
|
|
my ($from, $trapbuf);
|
4481 |
|
|
if (!defined ($from = recv (TRAPSERVER, $trapbuf, 65536, 0))) {
|
4482 |
|
|
syslog ('err', "error trying to recv a trap: $!");
|
4483 |
|
|
} else {
|
4484 |
|
|
handle_trap ($trapbuf, $from);
|
4485 |
|
|
}
|
4486 |
|
|
next;
|
4487 |
|
|
|
4488 |
|
|
#
|
4489 |
|
|
# client connections
|
4490 |
|
|
#
|
4491 |
|
|
} elsif (vec ($niovec, fileno (SERVER), 1)) {
|
4492 |
|
|
client_accept;
|
4493 |
|
|
}
|
4494 |
|
|
|
4495 |
|
|
#
|
4496 |
|
|
# read data from clients if any exists
|
4497 |
|
|
#
|
4498 |
|
|
if ($numclients) {
|
4499 |
|
|
foreach my $cl (keys %clients) {
|
4500 |
|
|
next if (!vec ($niovec, $cl, 1));
|
4501 |
|
|
|
4502 |
|
|
my $buf = '';
|
4503 |
|
|
$n = sysread ($clients{$cl}->{"fhandle"}, $buf, 8192);
|
4504 |
|
|
if ($n == 0 && $! != &EAGAIN) {
|
4505 |
|
|
client_close ($cl);
|
4506 |
|
|
} elsif (!defined $n) {
|
4507 |
|
|
client_close ($cl, "read error: $!");
|
4508 |
|
|
} else {
|
4509 |
|
|
$clients{$cl}->{"buf"} .= $buf;
|
4510 |
|
|
$clients{$cl}->{"timeout"} = $CF{"CLIENT_TIMEOUT"};
|
4511 |
|
|
$clients{$cl}->{"last_read"} = time;
|
4512 |
|
|
}
|
4513 |
|
|
}
|
4514 |
|
|
}
|
4515 |
|
|
}
|
4516 |
|
|
|
4517 |
|
|
#
|
4518 |
|
|
# execute client commands which have been read
|
4519 |
|
|
#
|
4520 |
|
|
client_dopending if ($numclients);
|
4521 |
|
|
|
4522 |
|
|
last if (tv_interval ($tm0, $tm1) >= $SLEEPINT);
|
4523 |
|
|
|
4524 |
|
|
$sleep = $SLEEPINT - tv_interval ($tm0, $tm1);
|
4525 |
|
|
}
|
4526 |
|
|
|
4527 |
|
|
if (!defined ($n)) {
|
4528 |
|
|
syslog ('err', "select returned an error for I/O loop: $!");
|
4529 |
|
|
}
|
4530 |
|
|
|
4531 |
|
|
#
|
4532 |
|
|
# count down client inactivity timeouts and close expired connections
|
4533 |
|
|
#
|
4534 |
|
|
if ($numclients) {
|
4535 |
|
|
foreach my $cl (keys %clients) {
|
4536 |
|
|
my $timenow = time;
|
4537 |
|
|
$clients{$cl}->{"timeout"} = $timenow - $clients{$cl}->{"last_read"};
|
4538 |
|
|
|
4539 |
|
|
if ($clients{$cl}->{"timeout"} >= $CF{"CLIENT_TIMEOUT"}) {
|
4540 |
|
|
client_close ($cl, "timeout after $CF{CLIENT_TIMEOUT}s");
|
4541 |
|
|
}
|
4542 |
|
|
}
|
4543 |
|
|
}
|
4544 |
|
|
}
|
4545 |
|
|
|
4546 |
|
|
|
4547 |
|
|
#
|
4548 |
|
|
# generate alert and monitor path hashes
|
4549 |
|
|
#
|
4550 |
|
|
sub gen_scriptdir_hash {
|
4551 |
|
|
my ($d, @scriptdirs, @alertdirs, $found);
|
4552 |
|
|
|
4553 |
|
|
%MONITORHASH = ();
|
4554 |
|
|
%ALERTHASH = ();
|
4555 |
|
|
|
4556 |
|
|
foreach $d (split (/\s*:\s*/, $CF{"SCRIPTDIR"})) {
|
4557 |
|
|
if (-d "$d" && -x "$d") {
|
4558 |
|
|
push (@scriptdirs, $d);
|
4559 |
|
|
} else {
|
4560 |
|
|
syslog ('err', "scriptdir $d is not usable");
|
4561 |
|
|
}
|
4562 |
|
|
}
|
4563 |
|
|
|
4564 |
|
|
foreach $d (split (/\s*:\s*/, $CF{"ALERTDIR"})) {
|
4565 |
|
|
if (-d $d && -x $d) {
|
4566 |
|
|
push (@alertdirs, $d);
|
4567 |
|
|
} else {
|
4568 |
|
|
syslog ('err', "alertdir $d is not usable");
|
4569 |
|
|
}
|
4570 |
|
|
}
|
4571 |
|
|
|
4572 |
|
|
#
|
4573 |
|
|
# monitors
|
4574 |
|
|
#
|
4575 |
|
|
foreach my $group (keys %watch) {
|
4576 |
|
|
foreach my $service (keys %{$watch{$group}}) {
|
4577 |
|
|
next if (!defined $watch{$group}->{$service}->{"monitor"});
|
4578 |
|
|
my $monitor = (split (/\s+/, $watch{$group}->{$service}->{"monitor"}))[0];
|
4579 |
|
|
$found = 0;
|
4580 |
|
|
foreach (@scriptdirs) {
|
4581 |
|
|
if (-x "$_/$monitor") {
|
4582 |
|
|
$MONITORHASH{$monitor} = "$_/$monitor"
|
4583 |
|
|
unless (defined $MONITORHASH{$monitor});
|
4584 |
|
|
$found++;
|
4585 |
|
|
last;
|
4586 |
|
|
}
|
4587 |
|
|
}
|
4588 |
|
|
if (!$found) {
|
4589 |
|
|
syslog ('err', "$monitor not found in one of (\@scriptdirs[@scriptdirs])");
|
4590 |
|
|
}
|
4591 |
|
|
}
|
4592 |
|
|
}
|
4593 |
|
|
|
4594 |
|
|
#
|
4595 |
|
|
# alerts
|
4596 |
|
|
#
|
4597 |
|
|
foreach my $group (keys %watch) {
|
4598 |
|
|
foreach my $service (keys %{$watch{$group}}) {
|
4599 |
|
|
if ($watch{$group}->{$service}->{"redistribute"} ne '') {
|
4600 |
|
|
my $alert = $watch{$group}->{$service}->{"redistribute"};
|
4601 |
|
|
$found = 0;
|
4602 |
|
|
foreach (@alertdirs) {
|
4603 |
|
|
if (-x "$_/$alert") {
|
4604 |
|
|
$ALERTHASH{$alert} = "$_/$alert"
|
4605 |
|
|
unless (defined $ALERTHASH{$alert});
|
4606 |
|
|
$found++;
|
4607 |
|
|
}
|
4608 |
|
|
}
|
4609 |
|
|
if (!$found) {
|
4610 |
|
|
syslog ('err', "$alert not found in one of (\@alerttdirs[@alertdirs])");
|
4611 |
|
|
}
|
4612 |
|
|
}
|
4613 |
|
|
foreach my $period (keys %{$watch{$group}->{$service}->{"periods"}}) {
|
4614 |
|
|
foreach my $my_alert (
|
4615 |
|
|
@{$watch{$group}->{$service}->{"periods"}->{$period}->{"alerts"}},
|
4616 |
|
|
@{$watch{$group}->{$service}->{"periods"}->{$period}->{"upalerts"}},
|
4617 |
|
|
@{$watch{$group}->{$service}->{"periods"}->{$period}->{"startupalerts"}},
|
4618 |
|
|
@{$watch{$group}->{$service}->{"periods"}->{$period}->{"ackalerts"}},
|
4619 |
|
|
@{$watch{$group}->{$service}->{"periods"}->{$period}->{"disablealerts"}},
|
4620 |
|
|
) {
|
4621 |
|
|
my $alert = $my_alert;
|
4622 |
|
|
$alert =~ s/^(\S+=\S+ )*(\S+).*$/$2/;
|
4623 |
|
|
$found = 0;
|
4624 |
|
|
foreach (@alertdirs) {
|
4625 |
|
|
if (-x "$_/$alert") {
|
4626 |
|
|
$ALERTHASH{$alert} = "$_/$alert"
|
4627 |
|
|
unless (defined $ALERTHASH{$alert});
|
4628 |
|
|
$found++;
|
4629 |
|
|
}
|
4630 |
|
|
}
|
4631 |
|
|
if (!$found) {
|
4632 |
|
|
syslog ('err', "$alert not found in one of (\@alerttdirs[@alertdirs])");
|
4633 |
|
|
}
|
4634 |
|
|
}
|
4635 |
|
|
}
|
4636 |
|
|
}
|
4637 |
|
|
}
|
4638 |
|
|
|
4639 |
|
|
}
|
4640 |
|
|
|
4641 |
|
|
|
4642 |
|
|
#
|
4643 |
|
|
# do some processing on dirs
|
4644 |
|
|
#
|
4645 |
|
|
sub normalize_paths {
|
4646 |
|
|
|
4647 |
|
|
my ($authtype, @authtypes);
|
4648 |
|
|
|
4649 |
|
|
#
|
4650 |
|
|
# do some sanity checks on dirs
|
4651 |
|
|
#
|
4652 |
|
|
$CF{"STATEDIR"} = "$CF{BASEDIR}/$CF{STATEDIR}" if ($CF{"STATEDIR"} !~ m{^/});
|
4653 |
|
|
syslog ('err', "$CF{STATEDIR} does not exist") if (! -d $CF{"STATEDIR"});
|
4654 |
|
|
|
4655 |
|
|
$CF{"LOGDIR"} = "$CF{BASEDIR}/$CF{LOGDIR}" if ($CF{"LOGDIR"} !~ m{^/});
|
4656 |
|
|
syslog ('err', "$CF{LOGDIR} does not exist") if (! -d $CF{LOGDIR});
|
4657 |
|
|
|
4658 |
|
|
|
4659 |
|
|
$CF{"AUTHFILE"} = "$CF{CFBASEDIR}/$CF{AUTHFILE}"
|
4660 |
|
|
if ($CF{"AUTHFILE"} !~ m{^/});
|
4661 |
|
|
syslog ('err', "$CF{AUTHFILE} does not exist")
|
4662 |
|
|
if (! -f $CF{"AUTHFILE"});
|
4663 |
|
|
|
4664 |
|
|
@authtypes = split(' ' , $CF{"AUTHTYPE"}) ;
|
4665 |
|
|
foreach $authtype (@authtypes) {
|
4666 |
|
|
if ($authtype eq "userfile") {
|
4667 |
|
|
$CF{"USERFILE"} = "$CF{CFBASEDIR}/$CF{USERFILE}"
|
4668 |
|
|
if ($CF{"USERFILE"} !~ m{^/});
|
4669 |
|
|
syslog ('err', "$CF{USERFILE} does not exist")
|
4670 |
|
|
if (! -f $CF{"USERFILE"});
|
4671 |
|
|
}
|
4672 |
|
|
}
|
4673 |
|
|
|
4674 |
|
|
$CF{"DTLOGFILE"} = "$CF{LOGDIR}/$CF{DTLOGFILE}"
|
4675 |
|
|
if ($CF{"DTLOGFILE"} !~ m{^/});
|
4676 |
|
|
|
4677 |
|
|
if ($CF{"HISTORICFILE"} ne "") {
|
4678 |
|
|
$CF{"HISTORICFILE"} = "$CF{LOGDIR}/$CF{HISTORICFILE}"
|
4679 |
|
|
if ($CF{"HISTORICFILE"} !~ m{^/});
|
4680 |
|
|
}
|
4681 |
|
|
|
4682 |
|
|
#
|
4683 |
|
|
# script and alert dirs may have multiple paths
|
4684 |
|
|
#
|
4685 |
|
|
foreach my $dir (\$CF{"SCRIPTDIR"}, \$CF{"ALERTDIR"}) {
|
4686 |
|
|
my @n;
|
4687 |
|
|
foreach my $d (split (/\s*:\s*/, $$dir)) {
|
4688 |
|
|
$d =~ s{/$}{};
|
4689 |
|
|
$d = "$CF{BASEDIR}/$d" if ($d !~ m{^/});
|
4690 |
|
|
syslog ('err', "$d does not exist, check your alertdir and mondir paths")
|
4691 |
|
|
unless (-d $d);
|
4692 |
|
|
push @n, $d;
|
4693 |
|
|
}
|
4694 |
|
|
$$dir = join (":", @n);
|
4695 |
|
|
}
|
4696 |
|
|
}
|
4697 |
|
|
|
4698 |
|
|
|
4699 |
|
|
#
|
4700 |
|
|
# set opstatus and save old status
|
4701 |
|
|
#
|
4702 |
|
|
sub set_op_status {
|
4703 |
|
|
my ($group, $service, $status) = @_;
|
4704 |
|
|
|
4705 |
|
|
$watch{$group}->{$service}->{"_last_op_status"} =
|
4706 |
|
|
$watch{$group}->{$service}->{"_op_status"};
|
4707 |
|
|
$watch{$group}->{$service}->{"_op_status"} = $status;
|
4708 |
|
|
}
|
4709 |
|
|
|
4710 |
|
|
|
4711 |
|
|
sub debug_dir {
|
4712 |
|
|
print STDERR <<EOF;
|
4713 |
|
|
basedir [$CF{BASEDIR}]
|
4714 |
|
|
cfbasedir [$CF{CFBASEDIR}]
|
4715 |
|
|
|
4716 |
|
|
cf [$CF{CF}]
|
4717 |
|
|
statedir [$CF{STATEDIR}]
|
4718 |
|
|
logdir [$CF{LOGDIR}]
|
4719 |
|
|
authfile [$CF{AUTHFILE}]
|
4720 |
|
|
userfile [$CF{USERFILE}]
|
4721 |
|
|
dtlogfile [$CF{DTLOGFILE}]
|
4722 |
|
|
historicfile[$CF{HISTORICFILE}]
|
4723 |
|
|
monerrfile [$CF{MONERRFILE}]
|
4724 |
|
|
scriptdir [$CF{SCRIPTDIR}]
|
4725 |
|
|
alertdir [$CF{ALERTDIR}]
|
4726 |
|
|
EOF
|
4727 |
|
|
|
4728 |
|
|
foreach my $m (keys %MONITORHASH) {
|
4729 |
|
|
print STDERR "M $m=[$MONITORHASH{$m}]\n";
|
4730 |
|
|
}
|
4731 |
|
|
foreach my $m (keys %ALERTHASH) {
|
4732 |
|
|
print STDERR "A $m=[$ALERTHASH{$m}]\n";
|
4733 |
|
|
}
|
4734 |
|
|
}
|
4735 |
|
|
|
4736 |
|
|
|
4737 |
|
|
#
|
4738 |
|
|
# globals affected by config file are
|
4739 |
|
|
# all stored in %CF
|
4740 |
|
|
#
|
4741 |
|
|
sub init_cf_globals {
|
4742 |
|
|
$CF{"BASEDIR"} = $opt{"b"} || "/usr/lib/mon";
|
4743 |
|
|
$CF{"BASEDIR"} =~ s{/$}{};
|
4744 |
|
|
$CF{"CFBASEDIR"} = $opt{"B"} || "/etc/mon";
|
4745 |
|
|
$CF{"CF"} = $opt{"c"} || "$CF{CFBASEDIR}/mon.cf";
|
4746 |
|
|
$CF{"CF"} = "$PWD/$CF{CF}" if ($CF{"CF"} !~ /^\//);
|
4747 |
|
|
$CF{"SCRIPTDIR"} = "/usr/local/lib/mon/mon.d:mon.d";
|
4748 |
|
|
$CF{"ALERTDIR"} = "/usr/local/lib/mon/alert.d:alert.d";
|
4749 |
|
|
$CF{"LOGDIR"} = $opt{"L"} || (-d "/var/log/mon" ? "/var/log/mon" : "log.d");
|
4750 |
|
|
$CF{"STATEDIR"} = -d "/var/state/mon" ? "/var/state/mon"
|
4751 |
|
|
: -d "/var/lib/mon" ? "/var/lib/mon"
|
4752 |
|
|
: "state.d";
|
4753 |
|
|
$CF{"AUTHFILE"} = "auth.cf";
|
4754 |
|
|
$CF{"AUTHTYPE"} = "getpwnam";
|
4755 |
|
|
$CF{"PAMSERVICE"} = "passwd";
|
4756 |
|
|
$CF{"USERFILE"} = "monusers.cf";
|
4757 |
|
|
$CF{"PIDFILE"} = (-d "/var/run/mon" ? "/var/run/mon"
|
4758 |
|
|
: -d "/var/run" ? "/var/run"
|
4759 |
|
|
: "/etc") . "/mon.pid";
|
4760 |
|
|
$CF{"MONERRFILE"} = "/dev/null";
|
4761 |
|
|
$CF{"DTLOGFILE"} = "downtime.log";
|
4762 |
|
|
$CF{"DTLOGGING"} = 0;
|
4763 |
|
|
$CF{"MAX_KEEP"} = 100;
|
4764 |
|
|
$CF{"CLIENT_TIMEOUT"} = 30;
|
4765 |
|
|
$CF{"SERVPORT"} = getservbyname ("mon", "tcp") || 2583;
|
4766 |
|
|
$CF{"TRAPPORT"} = getservbyname ("mon", "udp") || 2583;
|
4767 |
|
|
$CF{"CLIENTALLOW"} = '\d+.\d+.\d+.\d+';
|
4768 |
|
|
$CF{"MAXPROCS"} = 0;
|
4769 |
|
|
$CF{"HISTORICFILE"} = "";
|
4770 |
|
|
$CF{"HISTORICTIME"} = 0;
|
4771 |
|
|
$CF{"DEP_RECUR_LIMIT"} = 10;
|
4772 |
|
|
$CF{"SYSLOG_FACILITY"} = $opt{"O"} || "daemon";
|
4773 |
|
|
$CF{"STARTUPALERTS_ON_RESET"} = 0;
|
4774 |
|
|
$CF{"MONREMOTE"} = undef;
|
4775 |
|
|
}
|
4776 |
|
|
|
4777 |
|
|
|
4778 |
|
|
#
|
4779 |
|
|
# globals not affected by config file
|
4780 |
|
|
#
|
4781 |
|
|
sub init_globals {
|
4782 |
|
|
$TRAP_PRO_VERSION = 0.3807;
|
4783 |
|
|
$SLEEPINT = 1;
|
4784 |
|
|
$STOPPED = 0;
|
4785 |
|
|
$STOPPED_TIME = 0;
|
4786 |
|
|
$START_TIME = time;
|
4787 |
|
|
$PROT_VERSION = 0x2611;
|
4788 |
|
|
$HOSTNAME = hostname;
|
4789 |
|
|
$PWD = getcwd;
|
4790 |
|
|
|
4791 |
|
|
#
|
4792 |
|
|
# flags
|
4793 |
|
|
#
|
4794 |
|
|
$FL_MONITOR = 1;
|
4795 |
|
|
$FL_UPALERT = 2;
|
4796 |
|
|
$FL_TRAP = 4;
|
4797 |
|
|
$FL_TRAPTIMEOUT = 8;
|
4798 |
|
|
$FL_STARTUPALERT = 16;
|
4799 |
|
|
$FL_TEST = 32;
|
4800 |
|
|
$FL_REDISTRIBUTE = 64;
|
4801 |
|
|
$FL_ACKALERT = 128;
|
4802 |
|
|
$FL_DISABLEALERT = 256;
|
4803 |
|
|
|
4804 |
|
|
#
|
4805 |
|
|
# specific trap types
|
4806 |
|
|
#
|
4807 |
|
|
($TRAP_COLDSTART, $TRAP_WARMSTART, $TRAP_LINKDOWN, $TRAP_LINKUP,
|
4808 |
|
|
$TRAP_AUTHFAIL, $TRAP_EGPNEIGHBORLOSS, $TRAP_ENTERPRISE, $TRAP_HEARTBEAT) = (0..7);
|
4809 |
|
|
|
4810 |
|
|
#
|
4811 |
|
|
# operational statuses
|
4812 |
|
|
#
|
4813 |
|
|
($STAT_FAIL, $STAT_OK, $STAT_COLDSTART, $STAT_WARMSTART, $STAT_LINKDOWN,
|
4814 |
|
|
$STAT_UNKNOWN, $STAT_TIMEOUT, $STAT_UNTESTED, $STAT_DEPEND, $STAT_WARN) = (0..9);
|
4815 |
|
|
|
4816 |
|
|
%FAILURE = (
|
4817 |
|
|
$STAT_FAIL => 1,
|
4818 |
|
|
$STAT_LINKDOWN => 1,
|
4819 |
|
|
$STAT_TIMEOUT => 1,
|
4820 |
|
|
);
|
4821 |
|
|
|
4822 |
|
|
%SUCCESS = (
|
4823 |
|
|
$STAT_OK => 1,
|
4824 |
|
|
$STAT_COLDSTART => 1,
|
4825 |
|
|
$STAT_WARMSTART => 1,
|
4826 |
|
|
$STAT_UNKNOWN => 1,
|
4827 |
|
|
$STAT_UNTESTED => 1,
|
4828 |
|
|
);
|
4829 |
|
|
|
4830 |
|
|
%WARNING = (
|
4831 |
|
|
$STAT_COLDSTART => 1,
|
4832 |
|
|
$STAT_WARMSTART => 1,
|
4833 |
|
|
$STAT_UNKNOWN => 1,
|
4834 |
|
|
$STAT_WARN => 1,
|
4835 |
|
|
);
|
4836 |
|
|
|
4837 |
|
|
%OPSTAT = ("fail" => $STAT_FAIL, "ok" => $STAT_OK, "coldstart" => $STAT_COLDSTART,
|
4838 |
|
|
"warmstart" => $STAT_WARMSTART, "linkdown" => $STAT_LINKDOWN,
|
4839 |
|
|
"unknown" => $STAT_UNKNOWN, "timeout" => $STAT_TIMEOUT,
|
4840 |
|
|
"untested" => $STAT_UNTESTED);
|
4841 |
|
|
|
4842 |
|
|
#
|
4843 |
|
|
# fast lookup hashes for alerts and monitors
|
4844 |
|
|
#
|
4845 |
|
|
%MONITORHASH = ();
|
4846 |
|
|
%ALERTHASH = ();
|
4847 |
|
|
}
|
4848 |
|
|
|
4849 |
|
|
|
4850 |
|
|
#
|
4851 |
|
|
# clear timers
|
4852 |
|
|
#
|
4853 |
|
|
sub clear_timers {
|
4854 |
|
|
my ($group, $service) = @_;
|
4855 |
|
|
|
4856 |
|
|
return undef if (!defined $watch{$group}->{$service});
|
4857 |
|
|
|
4858 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
4859 |
|
|
|
4860 |
|
|
$sref->{"_trap_timer"} = $sref->{"traptimeout"}
|
4861 |
|
|
if ($sref->{"traptimeout"});
|
4862 |
|
|
|
4863 |
|
|
$sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
|
4864 |
|
|
if ($sref->{"trapduration"});
|
4865 |
|
|
|
4866 |
|
|
$sref->{"_timer"} = $sref->{"interval"}
|
4867 |
|
|
if ($sref->{"interval"});
|
4868 |
|
|
|
4869 |
|
|
$sref->{"_consec_failures"} = 0
|
4870 |
|
|
if ($sref->{"_consec_failures"});
|
4871 |
|
|
|
4872 |
|
|
foreach my $period (keys %{$sref->{"periods"}}) {
|
4873 |
|
|
my $pref = \%{$sref->{"periods"}->{$period}};
|
4874 |
|
|
|
4875 |
|
|
$pref->{"_last_alert"} = 0
|
4876 |
|
|
if ($pref->{"alertevery"});
|
4877 |
|
|
|
4878 |
|
|
$pref->{"_consec_failures"} = 0
|
4879 |
|
|
if ($pref->{"alertafter_consec"});
|
4880 |
|
|
|
4881 |
|
|
$pref->{'_1stfailtime'} = 0
|
4882 |
|
|
if ($pref->{"alertafterival"});
|
4883 |
|
|
}
|
4884 |
|
|
}
|
4885 |
|
|
|
4886 |
|
|
|
4887 |
|
|
#
|
4888 |
|
|
# load some amount of the alert history into memory
|
4889 |
|
|
#
|
4890 |
|
|
sub readhistoricfile {
|
4891 |
|
|
return if ($CF{"HISTORICFILE"} eq "");
|
4892 |
|
|
|
4893 |
|
|
if (!open (HISTFILE, $CF{"HISTORICFILE"})) {
|
4894 |
|
|
syslog ('err', "Could not read history from $CF{HISTORICFILE} : $!");
|
4895 |
|
|
return;
|
4896 |
|
|
}
|
4897 |
|
|
|
4898 |
|
|
my $epochLimit = 0;
|
4899 |
|
|
if ($CF{"HISTORICTIME"} != 0) {
|
4900 |
|
|
$epochLimit = time - $CF{"HISTORICTIME"};
|
4901 |
|
|
}
|
4902 |
|
|
|
4903 |
|
|
@last_alerts = ();
|
4904 |
|
|
|
4905 |
|
|
while (<HISTFILE>) {
|
4906 |
|
|
next if (/^\s*$/ || /^\s*#/);
|
4907 |
|
|
chomp;
|
4908 |
|
|
my $epochAlert = (split(/\s+/))[3];
|
4909 |
|
|
push (@last_alerts, $_) if ($epochAlert >= $epochLimit);
|
4910 |
|
|
}
|
4911 |
|
|
|
4912 |
|
|
close (HISTFILE);
|
4913 |
|
|
|
4914 |
|
|
if (defined $CF{"MAX_KEEP"}) {
|
4915 |
|
|
splice(@last_alerts, 0, $#last_alerts + 1 - $CF{"MAX_KEEP"});
|
4916 |
|
|
}
|
4917 |
|
|
}
|
4918 |
|
|
|
4919 |
|
|
|
4920 |
|
|
#
|
4921 |
|
|
# This routine simply calls an alert.
|
4922 |
|
|
#
|
4923 |
|
|
# call with %args = (
|
4924 |
|
|
# group => "name of group",
|
4925 |
|
|
# service => "name of service",
|
4926 |
|
|
# pref => "optional period reference",
|
4927 |
|
|
# alert => "alert script",
|
4928 |
|
|
# args => "args to alert script",
|
4929 |
|
|
# flags => "flags, as in $FL_*",
|
4930 |
|
|
# retval => "return value of monitor",
|
4931 |
|
|
# output => "output of monitor",
|
4932 |
|
|
# )
|
4933 |
|
|
#
|
4934 |
|
|
sub call_alert {
|
4935 |
|
|
my (%args) = @_;
|
4936 |
|
|
|
4937 |
|
|
foreach my $mandatory_arg (qw(group service flags
|
4938 |
|
|
retval alert output)) {
|
4939 |
|
|
if (!exists $args{$mandatory_arg})
|
4940 |
|
|
{
|
4941 |
|
|
debug (1, "returning from call_alert because of missing arg $mandatory_arg\n");
|
4942 |
|
|
return (undef);
|
4943 |
|
|
}
|
4944 |
|
|
}
|
4945 |
|
|
|
4946 |
|
|
my @groupargs = grep (!/^\*/, @{$groups{$args{"group"}}});
|
4947 |
|
|
|
4948 |
|
|
my $tmnow = time;
|
4949 |
|
|
my ($summary) = split("\n", $args{"output"});
|
4950 |
|
|
$summary = "(NO SUMMARY)" if (!defined $summary || $summary =~ /^\s*$/m);
|
4951 |
|
|
|
4952 |
|
|
my $sref = \%{$watch{$args{"group"}}->{$args{"service"}}};
|
4953 |
|
|
my $pref;
|
4954 |
|
|
|
4955 |
|
|
if (defined $args{"pref"}) {
|
4956 |
|
|
$pref = $args{"pref"};
|
4957 |
|
|
}
|
4958 |
|
|
|
4959 |
|
|
if (! defined $args{"args"}) {
|
4960 |
|
|
$args{"args"} = '';
|
4961 |
|
|
}
|
4962 |
|
|
|
4963 |
|
|
my $alert = "";
|
4964 |
|
|
if (!defined $ALERTHASH{$args{"alert"}} ||
|
4965 |
|
|
! -f $ALERTHASH{$args{"alert"}}) {
|
4966 |
|
|
syslog ('err', "no alert found while trying to run $args{alert}");
|
4967 |
|
|
return undef;
|
4968 |
|
|
} else {
|
4969 |
|
|
$alert = $ALERTHASH{$args{"alert"}};
|
4970 |
|
|
}
|
4971 |
|
|
|
4972 |
|
|
my $alerttype = ""; # sent to syslog and stored in @last_alerts
|
4973 |
|
|
my $alert_type = "failure"; # MON_ALERTTYPE set to this
|
4974 |
|
|
if ($args{"flags"} & $FL_UPALERT) {
|
4975 |
|
|
$alerttype = "upalert";
|
4976 |
|
|
$alert_type = "up";
|
4977 |
|
|
} elsif ($args{"flags"} & $FL_STARTUPALERT) {
|
4978 |
|
|
$alerttype = "startupalert";
|
4979 |
|
|
$alert_type = "startup";
|
4980 |
|
|
} elsif ($args{"flags"} & $FL_ACKALERT) {
|
4981 |
|
|
$alerttype = "ackalert";
|
4982 |
|
|
$alert_type = "ack";
|
4983 |
|
|
} elsif ($args{"flags"} & $FL_DISABLEALERT) {
|
4984 |
|
|
$alerttype = "disablealert";
|
4985 |
|
|
$alert_type = "disable";
|
4986 |
|
|
} elsif ($args{"flags"} & $FL_TRAPTIMEOUT) {
|
4987 |
|
|
$alerttype = "traptimeoutalert";
|
4988 |
|
|
$alert_type = "traptimeout";
|
4989 |
|
|
} elsif ($args{"flags"} & $FL_TRAP) {
|
4990 |
|
|
$alerttype = "trapalert";
|
4991 |
|
|
$alert_type = "trap";
|
4992 |
|
|
} elsif ($args{"flags"} & $FL_TEST) {
|
4993 |
|
|
$alerttype = "testalert";
|
4994 |
|
|
$alert_type = "test";
|
4995 |
|
|
} else {
|
4996 |
|
|
$alerttype = "alert";
|
4997 |
|
|
}
|
4998 |
|
|
|
4999 |
|
|
#
|
5000 |
|
|
# log why we are triggering an alert
|
5001 |
|
|
#
|
5002 |
|
|
my $a = $alert;
|
5003 |
|
|
$a =~ s{^.*/([^/]+)$}{$1};
|
5004 |
|
|
syslog ("alert", "calling $alerttype $a for" .
|
5005 |
|
|
" $args{group}/$args{service} ($alert,$args{args}) $summary") if (!($args{"flags"} & $FL_REDISTRIBUTE));;
|
5006 |
|
|
|
5007 |
|
|
|
5008 |
|
|
# We may block while writing to the alert script, so we'll fork first, allowing the
|
5009 |
|
|
# master process to move on.
|
5010 |
|
|
|
5011 |
|
|
my $pid;
|
5012 |
|
|
if ($pid = fork()) { ## Master
|
5013 |
|
|
# Do Nothing
|
5014 |
|
|
} elsif (defined($pid)) { ## Child
|
5015 |
|
|
my $pid = open (ALERT, "|-");
|
5016 |
|
|
if (!defined $pid) {
|
5017 |
|
|
syslog ('err', "could not fork: $!");
|
5018 |
|
|
return undef;
|
5019 |
|
|
}
|
5020 |
|
|
|
5021 |
|
|
#
|
5022 |
|
|
# grandchild, the actual alert
|
5023 |
|
|
#
|
5024 |
|
|
if ($pid == 0) {
|
5025 |
|
|
#
|
5026 |
|
|
# set env variables to pass to the alert
|
5027 |
|
|
#
|
5028 |
|
|
foreach my $v (keys %{$sref->{"ENV"}}) {
|
5029 |
|
|
$ENV{$v} = $sref->{"ENV"}->{$v};
|
5030 |
|
|
}
|
5031 |
|
|
|
5032 |
|
|
$ENV{"MON_LAST_SUMMARY"} = $sref->{"_last_summary"} if (defined $sref->{"_last_summary"});
|
5033 |
|
|
$ENV{"MON_LAST_OUTPUT"} = $sref->{"_last_output"} if (defined $sref->{"_last_output"});
|
5034 |
|
|
$ENV{"MON_LAST_FAILURE"} = $sref->{"_last_failure"} if (defined $sref->{"_last_failure"});
|
5035 |
|
|
$ENV{"MON_FIRST_FAILURE"} = $sref->{"_first_failure"} if (defined $sref->{"_first_failure"});
|
5036 |
|
|
$ENV{"MON_FIRST_SUCCESS"} = $sref->{"_first_success"} if (defined $sref->{"_last_success"});
|
5037 |
|
|
$ENV{"MON_LAST_SUCCESS"} = $sref->{"_last_success"} if (defined $sref->{"_last_success"});
|
5038 |
|
|
$ENV{"MON_DESCRIPTION"} = $sref->{"description"} if (defined $sref->{"description"});
|
5039 |
|
|
$ENV{"MON_GROUP"} = $args{"group"} if (defined $args{"group"});
|
5040 |
|
|
$ENV{"MON_SERVICE"} = $args{"service"} if (defined $args{"service"});
|
5041 |
|
|
$ENV{"MON_RETVAL"} = $args{"retval"} if (defined $args{"retval"});
|
5042 |
|
|
$ENV{"MON_OPSTATUS"} = $sref->{"_op_status"} if (defined $sref->{"_op_status"});
|
5043 |
|
|
$ENV{"MON_LAST_OPSTATUS"} = $sref->{"_last_op_status"} if (defined $sref->{"_last_op_status"});
|
5044 |
|
|
$ENV{"MON_ACK"} = $sref->{"_ack_comment"} if ($sref->{"_ack"} && $sref->{"_ack_comment"} ne "");
|
5045 |
|
|
$ENV{"MON_ALERTTYPE"} = $alert_type;
|
5046 |
|
|
$ENV{"MON_STATEDIR"} = $CF{"STATEDIR"};
|
5047 |
|
|
$ENV{"MON_LOGDIR"} = $CF{"LOGDIR"};
|
5048 |
|
|
$ENV{"MON_CFBASEDIR"} = $CF{"CFBASEDIR"};
|
5049 |
|
|
|
5050 |
|
|
if( defined($sref->{"_intended"}) )
|
5051 |
|
|
{
|
5052 |
|
|
$ENV{"MON_TRAP_INTENDED"} = $sref->{"_intended"};
|
5053 |
|
|
}
|
5054 |
|
|
|
5055 |
|
|
else
|
5056 |
|
|
{
|
5057 |
|
|
undef ($ENV{"MON_TRAP_INTENDED"}) if (defined($ENV{"MON_TRAP_INTENDED"}));
|
5058 |
|
|
}
|
5059 |
|
|
|
5060 |
|
|
my $t;
|
5061 |
|
|
$t = "-u" if ($args{"flags"} & $FL_UPALERT);
|
5062 |
|
|
$t = "-a" if ($args{"flags"} & $FL_ACKALERT);
|
5063 |
|
|
$t = "-D" if ($args{"flags"} & $FL_DISABLEALERT);
|
5064 |
|
|
$t = "-T" if ($args{"flags"} & $FL_TRAP);
|
5065 |
|
|
$t = "-O" if ($args{"flags"} & $FL_TRAPTIMEOUT);
|
5066 |
|
|
|
5067 |
|
|
my @execargs = (
|
5068 |
|
|
$alert,
|
5069 |
|
|
"-s", "$args{service}",
|
5070 |
|
|
"-g", "$args{group}",
|
5071 |
|
|
"-h", "@groupargs",
|
5072 |
|
|
"-t", "$tmnow",
|
5073 |
|
|
);
|
5074 |
|
|
|
5075 |
|
|
if ($t) {
|
5076 |
|
|
push @execargs, $t;
|
5077 |
|
|
}
|
5078 |
|
|
|
5079 |
|
|
if ($args{"args"} ne "") {
|
5080 |
|
|
push @execargs, quotewords('\s+',0,$args{"args"});
|
5081 |
|
|
}
|
5082 |
|
|
|
5083 |
|
|
if (!exec @execargs) {
|
5084 |
|
|
syslog ('err', "could not exec alert $alert: $!");
|
5085 |
|
|
return undef;
|
5086 |
|
|
}
|
5087 |
|
|
exit;
|
5088 |
|
|
}
|
5089 |
|
|
|
5090 |
|
|
#
|
5091 |
|
|
# this will block if the alert is sucking gas, which is why we forked above
|
5092 |
|
|
#
|
5093 |
|
|
print ALERT $args{"output"};
|
5094 |
|
|
close (ALERT);
|
5095 |
|
|
exit;
|
5096 |
|
|
}
|
5097 |
|
|
|
5098 |
|
|
#
|
5099 |
|
|
# test alerts and redistributions don't count
|
5100 |
|
|
#
|
5101 |
|
|
return (1) if ($args{"flags"} & ($FL_TEST | $FL_REDISTRIBUTE));
|
5102 |
|
|
|
5103 |
|
|
#
|
5104 |
|
|
# tally this alert
|
5105 |
|
|
#
|
5106 |
|
|
if (defined $args{"pref"}) {
|
5107 |
|
|
$pref->{"_last_alert"} = $tmnow;
|
5108 |
|
|
}
|
5109 |
|
|
$sref->{"_alert_count"}++;
|
5110 |
|
|
|
5111 |
|
|
#
|
5112 |
|
|
# store this in the log
|
5113 |
|
|
#
|
5114 |
|
|
shift @last_alerts if (@last_alerts > $CF{"MAX_KEEP"});
|
5115 |
|
|
|
5116 |
|
|
my $alertline = "$alerttype $args{group} $args{service}" .
|
5117 |
|
|
" $tmnow $alert ($args{args}) $summary";
|
5118 |
|
|
push @last_alerts, $alertline;
|
5119 |
|
|
|
5120 |
|
|
#
|
5121 |
|
|
# append to alert history file
|
5122 |
|
|
#
|
5123 |
|
|
if ($CF{"HISTORICFILE"} ne "") {
|
5124 |
|
|
if (!open (HISTFILE, ">>$CF{HISTORICFILE}")) {
|
5125 |
|
|
syslog ('err', "Could not append alert history to $CF{HISTORICFILE}: $!");
|
5126 |
|
|
} else {
|
5127 |
|
|
print HISTFILE $alertline, "\n";
|
5128 |
|
|
close (HISTFILE);
|
5129 |
|
|
}
|
5130 |
|
|
}
|
5131 |
|
|
|
5132 |
|
|
return 1;
|
5133 |
|
|
}
|
5134 |
|
|
|
5135 |
|
|
|
5136 |
|
|
#
|
5137 |
|
|
# recursively evaluate a dependency expression
|
5138 |
|
|
# substitutes "GROUP:SERVICE" with "1" or "0" if the service is pass/fail, resp.
|
5139 |
|
|
#
|
5140 |
|
|
# returns an anonymous hash reference
|
5141 |
|
|
#
|
5142 |
|
|
# {
|
5143 |
|
|
# status =>, # "D" recursion depth exceeded
|
5144 |
|
|
# # "O" everything is OK
|
5145 |
|
|
# # "E" eval error
|
5146 |
|
|
# depend =>, # 1 for success (no deps in a failure state)
|
5147 |
|
|
# # 0 if any deps failed
|
5148 |
|
|
# error =>, # the textual error associated with "D" or "E" status
|
5149 |
|
|
# }
|
5150 |
|
|
#
|
5151 |
|
|
sub depend {
|
5152 |
|
|
my ($depend, $depth, $deptype) = @_;
|
5153 |
|
|
debug (2, "checking DEP [$depend]\n");
|
5154 |
|
|
|
5155 |
|
|
if ($depth > $CF{"DEP_RECUR_LIMIT"}) {
|
5156 |
|
|
return {
|
5157 |
|
|
status => "D",
|
5158 |
|
|
depend => undef,
|
5159 |
|
|
error => "recursion too deep for ($depend)",
|
5160 |
|
|
};
|
5161 |
|
|
}
|
5162 |
|
|
|
5163 |
|
|
foreach my $depstr ($depend =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g)
|
5164 |
|
|
{
|
5165 |
|
|
my ($group ,$service) = split(':', $depstr);
|
5166 |
|
|
|
5167 |
|
|
my $sref = \%{$watch{$group}->{$service}};
|
5168 |
|
|
my $depval = undef;
|
5169 |
|
|
my $subdepend = "";
|
5170 |
|
|
if (defined $sref->{"depend"} && $sref->{"dep_behavior"} eq $deptype) {
|
5171 |
|
|
$subdepend = $sref->{"depend"};
|
5172 |
|
|
} elsif ($deptype eq 'a' && defined $sref->{"alertdepend"}) {
|
5173 |
|
|
$subdepend = $sref->{"alertdepend"};
|
5174 |
|
|
} elsif ($deptype eq 'm' && defined $sref->{"monitordepend"}) {
|
5175 |
|
|
$subdepend = $sref->{"monitordepend"};
|
5176 |
|
|
}
|
5177 |
|
|
|
5178 |
|
|
#
|
5179 |
|
|
# disabled watches and services used to be counted as "passing"
|
5180 |
|
|
# now we'll use the actual values, to avoid having dependent services
|
5181 |
|
|
# alert when a broken service gets disabled
|
5182 |
|
|
#
|
5183 |
|
|
# if ((exists $watch_disabled{$group} && $watch_disabled{$group}) || (defined $sref->{"disable"} && $sref->{"disable"} == 1))
|
5184 |
|
|
# {
|
5185 |
|
|
# $depval = 1;
|
5186 |
|
|
#
|
5187 |
|
|
#
|
5188 |
|
|
# root dependency found
|
5189 |
|
|
#
|
5190 |
|
|
# }
|
5191 |
|
|
# elsif ($subdepend eq "")
|
5192 |
|
|
if ($subdepend eq "")
|
5193 |
|
|
{
|
5194 |
|
|
debug (2, " found root dep $group,$service\n");
|
5195 |
|
|
|
5196 |
|
|
$depval = $SUCCESS{$sref->{"_op_status"}} && ($sref->{"_last_failure_time"} < (time - $sref->{"dep_memory"}));
|
5197 |
|
|
|
5198 |
|
|
#
|
5199 |
|
|
# not a root dep, recurse
|
5200 |
|
|
#
|
5201 |
|
|
}
|
5202 |
|
|
else
|
5203 |
|
|
{
|
5204 |
|
|
#
|
5205 |
|
|
# do it recursively
|
5206 |
|
|
#
|
5207 |
|
|
my $dstatus = depend ($subdepend, $depth + 1, $deptype);
|
5208 |
|
|
debug (2,
|
5209 |
|
|
"recur depth $depth returned $dstatus->{status},$dstatus->{depend}\n");
|
5210 |
|
|
|
5211 |
|
|
#
|
5212 |
|
|
# a bad thing happened, bail out
|
5213 |
|
|
#
|
5214 |
|
|
if ($dstatus->{"status"} ne "O")
|
5215 |
|
|
{
|
5216 |
|
|
debug (2,
|
5217 |
|
|
"recursive dep failure for $group,$service (status=$dstatus->{status})\n");
|
5218 |
|
|
return $dstatus;
|
5219 |
|
|
}
|
5220 |
|
|
|
5221 |
|
|
$depval = $dstatus->{"depend"} && $SUCCESS{$sref->{"_op_status"}}
|
5222 |
|
|
&& ($sref->{"_last_failure_time"} < (time - $sref->{"dep_memory"}));
|
5223 |
|
|
}
|
5224 |
|
|
|
5225 |
|
|
my $v = int ($depval);
|
5226 |
|
|
debug (2, " ($group,$service) $depth depend=[$v][$depend]");
|
5227 |
|
|
$depend =~ s/\b$depstr\b/$v/g;
|
5228 |
|
|
debug (2, " depend=[$depend]\n");
|
5229 |
|
|
}
|
5230 |
|
|
|
5231 |
|
|
debug (2, " before eval: [$depend]");
|
5232 |
|
|
my $e = eval("$DEP_EVAL_SANDBOX $depend");
|
5233 |
|
|
debug (2, " after eval: [$e]\n");
|
5234 |
|
|
|
5235 |
|
|
if ($@ eq "")
|
5236 |
|
|
{
|
5237 |
|
|
return
|
5238 |
|
|
{
|
5239 |
|
|
status => "O",
|
5240 |
|
|
depend => $e,
|
5241 |
|
|
};
|
5242 |
|
|
|
5243 |
|
|
}
|
5244 |
|
|
else
|
5245 |
|
|
{
|
5246 |
|
|
return
|
5247 |
|
|
{
|
5248 |
|
|
status => "E",
|
5249 |
|
|
depend => $e,
|
5250 |
|
|
error => $@,
|
5251 |
|
|
};
|
5252 |
|
|
}
|
5253 |
|
|
}
|
5254 |
|
|
|
5255 |
|
|
|
5256 |
|
|
#
|
5257 |
|
|
# returns undef on error
|
5258 |
|
|
# 0 if dependency failure, sets _depend_status to 0
|
5259 |
|
|
# 1 if dependencies are OK, sets _depend_status to 1
|
5260 |
|
|
#
|
5261 |
|
|
sub dep_ok
|
5262 |
|
|
{
|
5263 |
|
|
my $sref = shift;
|
5264 |
|
|
my $deptype = shift;
|
5265 |
|
|
my $depend = "";
|
5266 |
|
|
if (defined $sref->{"depend"} && $sref->{"dep_behavior"} eq $deptype) {
|
5267 |
|
|
$depend = $sref->{"depend"};
|
5268 |
|
|
} elsif ($deptype eq 'a' && defined $sref->{"alertdepend"}) {
|
5269 |
|
|
$depend = $sref->{"alertdepend"};
|
5270 |
|
|
} elsif ($deptype eq 'm' && defined $sref->{"monitordepend"}) {
|
5271 |
|
|
$depend = $sref->{"monitordepend"};
|
5272 |
|
|
}
|
5273 |
|
|
|
5274 |
|
|
return 1 unless ($depend ne "");
|
5275 |
|
|
|
5276 |
|
|
my $s = depend ($depend, 0, $deptype);
|
5277 |
|
|
|
5278 |
|
|
if ($s->{"status"} eq "D")
|
5279 |
|
|
{
|
5280 |
|
|
debug (2, "dep recursion too deep\n");
|
5281 |
|
|
return undef;
|
5282 |
|
|
|
5283 |
|
|
}
|
5284 |
|
|
elsif ($s->{"status"} eq "E")
|
5285 |
|
|
{
|
5286 |
|
|
syslog ("notice", "eval error for dependency starting at $depend: ".$s->{error});
|
5287 |
|
|
return undef;
|
5288 |
|
|
}
|
5289 |
|
|
elsif ($s->{"status"} eq "O" && !$s->{"depend"})
|
5290 |
|
|
{
|
5291 |
|
|
$sref->{"_depend_status"} = 0;
|
5292 |
|
|
return 0;
|
5293 |
|
|
}
|
5294 |
|
|
|
5295 |
|
|
$sref->{"_depend_status"} = 1;
|
5296 |
|
|
|
5297 |
|
|
return 1;
|
5298 |
|
|
}
|
5299 |
|
|
|
5300 |
|
|
|
5301 |
|
|
#
|
5302 |
|
|
# returns undef on error
|
5303 |
|
|
# otherwise a reference to a list summaries from all
|
5304 |
|
|
# DIRECT dependencies currently failing
|
5305 |
|
|
sub dep_summary
|
5306 |
|
|
{
|
5307 |
|
|
my $sref = shift;
|
5308 |
|
|
my @sum;
|
5309 |
|
|
my @deps = ();
|
5310 |
|
|
|
5311 |
|
|
if (defined $sref->{"depend"} && $sref->{"dep_behavior"} eq "hm") {
|
5312 |
|
|
@deps = ($sref->{"depend"} =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g);
|
5313 |
|
|
} elsif (defined $sref->{"hostdepend"}) {
|
5314 |
|
|
@deps = ($sref->{"hostdepend"} =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g);
|
5315 |
|
|
}
|
5316 |
|
|
|
5317 |
|
|
return [] if (! @deps);
|
5318 |
|
|
|
5319 |
|
|
foreach (@deps) {
|
5320 |
|
|
my ($group, $service) = split /:/;
|
5321 |
|
|
if (!(exists $watch{$group} && exists $watch{$group}->{$service})) {
|
5322 |
|
|
return undef;
|
5323 |
|
|
}
|
5324 |
|
|
|
5325 |
|
|
if ($watch{$group}->{$service}{"_op_status"} == $STAT_FAIL) {
|
5326 |
|
|
push @sum, $watch{$group}->{$service}{"_last_summary"};
|
5327 |
|
|
} elsif ($watch{$group}->{$service}{"_last_failure_time"} >= (time - $watch{$group}->{$service}{"dep_memory"})) {
|
5328 |
|
|
push @sum, $watch{$group}->{$service}{"_last_failure_summary"};
|
5329 |
|
|
}
|
5330 |
|
|
}
|
5331 |
|
|
|
5332 |
|
|
return \@sum;
|
5333 |
|
|
}
|
5334 |
|
|
|
5335 |
|
|
#
|
5336 |
|
|
# convert a string to a hex-escaped string, returning
|
5337 |
|
|
# the escaped string.
|
5338 |
|
|
#
|
5339 |
|
|
# $str is the string to be escaped
|
5340 |
|
|
# if $inquotes is true, backslashes are doubled, making
|
5341 |
|
|
# the escaped string suitable to be enclosed in
|
5342 |
|
|
# single quotes and later passed to Text::quotewords.
|
5343 |
|
|
# For example, var='quoted value'
|
5344 |
|
|
#
|
5345 |
|
|
sub esc_str {
|
5346 |
|
|
my $str = shift;
|
5347 |
|
|
my $inquotes = shift;
|
5348 |
|
|
|
5349 |
|
|
my $escstr = "";
|
5350 |
|
|
|
5351 |
|
|
return $escstr if (!defined $str);
|
5352 |
|
|
|
5353 |
|
|
for (my $i = 0; $i < length ($str); $i++)
|
5354 |
|
|
{
|
5355 |
|
|
my $c = substr ($str, $i, 1);
|
5356 |
|
|
|
5357 |
|
|
if (ord ($c) <= 32 ||
|
5358 |
|
|
ord ($c) > 126 ||
|
5359 |
|
|
$c eq "\"" ||
|
5360 |
|
|
$c eq "\'")
|
5361 |
|
|
{
|
5362 |
|
|
$c = sprintf ("\\%02x", ord($c));
|
5363 |
|
|
}
|
5364 |
|
|
elsif ($inquotes && $c eq "\\")
|
5365 |
|
|
{
|
5366 |
|
|
$c = "\\\\";
|
5367 |
|
|
}
|
5368 |
|
|
|
5369 |
|
|
$escstr .= $c;
|
5370 |
|
|
}
|
5371 |
|
|
|
5372 |
|
|
$escstr;
|
5373 |
|
|
}
|
5374 |
|
|
|
5375 |
|
|
|
5376 |
|
|
#
|
5377 |
|
|
# convert a hex-escaped string into an unescaped string,
|
5378 |
|
|
# returning the unescaped string
|
5379 |
|
|
#
|
5380 |
|
|
sub un_esc_str {
|
5381 |
|
|
my $str = shift;
|
5382 |
|
|
|
5383 |
|
|
$str =~ s{\\([0-9a-f]{2})}{chr(hex($1))}eg;
|
5384 |
|
|
|
5385 |
|
|
$str;
|
5386 |
|
|
}
|
5387 |
|
|
|
5388 |
|
|
|
5389 |
|
|
sub syslog_die {
|
5390 |
|
|
my $msg = shift;
|
5391 |
|
|
|
5392 |
|
|
syslog ("err", $msg);
|
5393 |
|
|
die "$msg\n";
|
5394 |
|
|
}
|
5395 |
|
|
|
5396 |
|
|
no warnings; # Redefining syslog
|
5397 |
|
|
sub syslog {
|
5398 |
|
|
eval {
|
5399 |
|
|
local $SIG{"__DIE__"}= sub { };
|
5400 |
|
|
my @log = map { s/\%//mg; } @_;
|
5401 |
|
|
Sys::Syslog::syslog(@log);
|
5402 |
|
|
}
|
5403 |
|
|
}
|
5404 |
|
|
use warnings;
|
5405 |
|
|
|
5406 |
|
|
#
|
5407 |
|
|
# Have a "conversation" with a PAM authentication module. This fools the
|
5408 |
|
|
# PAM module into authenticating us non-interactively.
|
5409 |
|
|
#
|
5410 |
|
|
sub pam_conv_func {
|
5411 |
|
|
my @res;
|
5412 |
|
|
while ( @_ ) {
|
5413 |
|
|
my $code = shift;
|
5414 |
|
|
my $msg = shift;
|
5415 |
|
|
my $ans = "";
|
5416 |
|
|
|
5417 |
|
|
$ans = $PAM_username if ($code == Authen::PAM::PAM_PROMPT_ECHO_ON() );
|
5418 |
|
|
$ans = $PAM_password if ($code == Authen::PAM::PAM_PROMPT_ECHO_OFF() );
|
5419 |
|
|
|
5420 |
|
|
push @res, Authen::PAM::PAM_SUCCESS();
|
5421 |
|
|
push @res, $ans;
|
5422 |
|
|
}
|
5423 |
|
|
push @res, Authen::PAM::PAM_SUCCESS();
|
5424 |
|
|
return @res;
|
5425 |
|
|
}
|
5426 |
|
|
|
5427 |
|
|
|
5428 |
|
|
sub write_dtlog
|
5429 |
|
|
{
|
5430 |
|
|
my ($sref, $group, $service) = @_;
|
5431 |
|
|
|
5432 |
|
|
my $tmnow = time;
|
5433 |
|
|
|
5434 |
|
|
$sref->{"_first_failure"} = $START_TIME
|
5435 |
|
|
if ($sref->{"_first_failure"} == 0);
|
5436 |
|
|
|
5437 |
|
|
if (!open (DTLOG, ">>$CF{DTLOGFILE}"))
|
5438 |
|
|
{
|
5439 |
|
|
syslog ('err', "could not append to $CF{DTLOGFILE}: $!");
|
5440 |
|
|
$CF{"DTLOGGING"} = 0;
|
5441 |
|
|
}
|
5442 |
|
|
|
5443 |
|
|
else
|
5444 |
|
|
{
|
5445 |
|
|
$CF{"DTLOGGING"} = 1;
|
5446 |
|
|
print DTLOG ($tmnow,
|
5447 |
|
|
" $group",
|
5448 |
|
|
" $service",
|
5449 |
|
|
" ", 0 + $sref->{"_first_failure"},
|
5450 |
|
|
" ", 0 + $tmnow - $sref->{"_first_failure"},
|
5451 |
|
|
" ", 0 + $sref->{'interval'},
|
5452 |
|
|
" $sref->{'_last_summary'}\n") or
|
5453 |
|
|
syslog ('err', "error writing to $CF{DTLOGFILE}: $!");
|
5454 |
|
|
close(DTLOG);
|
5455 |
|
|
}
|
5456 |
|
|
}
|
5457 |
|
|
|
5458 |
|
|
# Perl's "system" function blocks. We don't want the mon process to
|
5459 |
|
|
# ever block. So we fork then call system. Mon will handle the
|
5460 |
|
|
# child process cleanup elsewhere.
|
5461 |
|
|
sub mysystem {
|
5462 |
|
|
my @args = @_;
|
5463 |
|
|
my $pid;
|
5464 |
|
|
print STDERR "mysystem called: @args\n";
|
5465 |
|
|
if ($pid = fork()) { ## parent
|
5466 |
|
|
return;
|
5467 |
|
|
} elsif (defined($pid)) { ## child
|
5468 |
|
|
system(@args);
|
5469 |
|
|
exit(0)
|
5470 |
|
|
} else { ## parent - fork failed
|
5471 |
|
|
print STDERR "You lose!\n";
|
5472 |
|
|
}
|
5473 |
|
|
print STDERR "mysystem returning\n";
|
5474 |
|
|
};
|