Project

General

Profile

Download (16.1 KB) Statistics
| Branch: | Revision:
1
#!/usr/bin/perl
2

    
3
# All rights reserved and Copyright (c) 2020 Origo Systems ApS.
4
# This file is provided with no warranty, and is subject to the terms and conditions defined in the license file LICENSE.md.
5
# The license file is part of this source code package and its content is also available at:
6
# https://www.origo.io/info/stabiledocs/licensing/stabile-open-source-license
7

    
8
# mail.alert - Mail alert for mon
9
#
10
# The first line from STDIN is summary information, adequate to send
11
# to a pager or email subject line.
12
#
13
# -f from@addr.x   set the smtp envelope "from" address
14
#
15
use Getopt::Std;
16
use Text::Wrap;
17
use Data::Dumper;
18
use Tie::DBI;
19
use ConfigReader::Simple;
20
use String::Escape qw( unbackslash backslash );
21
use MIME::Lite;
22
use URI::Escape;
23
use HTTP::Async;
24
use HTTP::Request::Common;
25
use Proc::ProcessTable;
26
use utf8;
27

    
28
$ENV{PATH} = '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin';
29
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
30

    
31
getopts ("S:s:g:h:t:l:f:u");
32

    
33
$summary=<STDIN> if (!-t STDIN);
34
chomp $summary;
35

    
36
$summary = $opt_S if (defined $opt_S);
37

    
38
$mailaddrs = join (',', @ARGV);
39
$mailfrom = "-f $opt_f -F $opt_f" if (defined $opt_f);
40

    
41
my $config = ConfigReader::Simple->new("/etc/stabile/config.cfg",
42
    [qw(DBI_USER DBI_PASSWD)]);
43

    
44
$dbiuser =  $config->get('DBI_USER') || "irigo";
45
$dbipasswd = $config->get('DBI_PASSWD') || "";
46

    
47
$oponduty = 'operator@sa.origo.io';
48

    
49
$t = localtime($opt_t);
50
($wday,$mon,$day,$tm) = split (/\s+/, $t);
51

    
52
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($opt_t);
53
$year += 1900;
54
$month = substr("0" . ($mon+1), -2);
55
$pretty_time = sprintf "%4d-%02d-%02d@%02d:%02d:%02d",$year,$mon+1,$mday,$hour,$min,$sec;
56

    
57
my $logfile = "/var/log/stabile/$year-$month:$opt_g:$opt_s";
58
unless (-e $logfile) {
59
    `/usr/bin/touch $logfile`;
60
    `/bin/chown mon:mon $logfile`;
61
}
62

    
63
%opstatus = getSavedOpstatus();
64
my $ds = doSendAlert($opt_g, $opt_s); # Determine if DOWN alerts have already been sent
65
my $logline;
66
my $estatus = $opstatus{"$opt_g:$opt_s"}->{'opstatus'};
67
my $existing_monitor = (defined $estatus);
68

    
69
#$ALERT = $opt_u ? "UP" : ($summary ? "DOWN" : "STARTUP");
70
$ALERT ="DOWN";
71
$ALERT = "UP" if ($opt_u);
72
$ALERT = "STARTUP" if ($ENV{"MON_ALERTTYPE"} eq 'startup');
73

    
74
my %disabled;
75
my @dislist = split(/\n/, `/usr/bin/moncmd list disabled`);
76
foreach my $disline (@dislist) {
77
    my ($a, $b, $c, $d) = split(' ', $disline);
78
    $disabled{"$b" . ($d?":$d":'')} = 1;
79
};
80

    
81
if ($disabled{"$opt_g:$opt_s"}) {
82
    $logline = "$opt_t, DISABLE, RESTART, $pretty_time";
83

    
84
} elsif ($ALERT eq 'STARTUP' && $existing_monitor) { # Not a new service, just a restart
85
    my $EALERT = ($estatus)?"UP":"DOWN";
86
    $logline = "$opt_t, $EALERT, RESTART, $pretty_time";
87

    
88
} else {
89
    $logline = "$opt_t, $ALERT, $summary, $pretty_time";
90
}
91
`/bin/echo "$logline" >> $logfile` if ($opt_t);
92
# `/bin/echo "$logline :: $ALERT, $ds, $ENV{'MON_LAST_OPSTATUS'}, $existing_monitor, $opstatus{"$group:$service"}->{'alerts_sent'}, $opstatus{"$group:$service"}->{'ack'}" >> /tmp/monlog`;
93

    
94
#
95
# The remaining lines normally contain more detailed information,
96
# but this is monitor-dependent.
97
#
98
my $details;
99
if (!-t STDIN) {
100
    while (<STDIN>) {
101
        $details .= $_;
102
    }
103
}
104
chomp $details;
105
#`/bin/echo >> /var/log/stabile/test.log "$ALERT, $ds ($opt_g, $opt_s): $ENV{'MON_LAST_OPSTATUS'}: $existing_monitor: $mailaddrs, $details"` if ($opt_s =~ /http/);
106
if (   ($ALERT eq 'DOWN' && $ds)
107
    || ($ALERT eq 'UP' && $ENV{'MON_LAST_OPSTATUS'} eq '0')
108
    || ($ALERT eq 'UP' && !$existing_monitor)
109
    ) {
110

    
111
    my ($user, $servername) = updateClientUI();
112
    $servername = $opt_g unless ($servername);
113
    $user = "mon" if ($user =~ /Error getting user/);
114
    my $exclamation = ($ALERT eq 'UP')?'RECOVERY:':'ALERT:';
115
    my $engineid = `cat /etc/stabile/config.cfg | sed -n -e 's/^ENGINEID=//p'`; chomp $engineid;
116
    my $enginelink = `cat /etc/stabile/baseurl`; chomp $enginelink;
117
    my $enginename = `cat /etc/stabile/config.cfg | sed -n -e 's/^ENGINENAME=//p'`; chomp $enginename;
118
    my $doxmpp = `cat /etc/stabile/config.cfg | sed -n -e 's/^DO_XMPP=//p'`; chomp $doxmpp;
119
    my $sysuuid = `echo servers/$opt_g/list | stash | sed -n -e 's/.*"system" : "//p'| sed -n -e 's/",//p'`;
120
    chomp $sysuuid;
121
    $sysuuid = $opt_g unless (length $sysuuid == 36);
122

    
123
    if ($mailaddrs
124
    # fix for bug in https monitor, which sends out wrong up alerts
125
    # &&    !($details =~ /Bad response code/ && $opt_s eq 'https' && $ENV{'MON_ALERTTYPE'} eq 'up')
126
    ) {
127

    
128
        my $first_failure = localtime($ENV{'MON_FIRST_FAILURE'});
129
        my $subject = "$exclamation $servername:$opt_s is $ALERT ($pretty_time)";
130
        my $xmpptext = "$subject\n";
131
        $xmpptext .= "IP address: $opt_h\n";
132
        $xmpptext .= "Link to engine: $enginelink" if ($enginelink);
133

    
134
        my $mailtext = <<EOF;
135
Server UUID: $opt_g
136
System UUID: $sysuuid
137
Server name: $servername
138
Server user: $user
139
Service: $opt_s
140
Time noticed: $t
141
Down since: $first_failure
142
Last status: $ENV{'MON_LAST_OPSTATUS'}
143
Alert type: $ENV{'MON_ALERTTYPE'}
144
EOF
145

    
146
        $mailtext .= "Secs until next alert: $opt_l\n" if ($opt_l);
147
        $mailtext .= "IP address: $opt_h\n";
148
        $mailtext .= "Link to engine: $enginelink\n" if ($enginelink);
149
        $mailtext .= "Detailed text:\n$details" if ($details);
150

    
151
        my $mailhtml = <<END;
152
<!DOCTYPE html
153
	PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
154
	 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
155
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
156
	<head>
157
		<title>$exclamation $servername:$opt_s is $ALERT</title>
158
		<meta http-equiv="Pragma" content="no-cache" />
159
		<link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css" />
160
		<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
161
	</head>
162
	<body class="tundra">
163
		<div>
164
			<div class="well" style="margin:20px;">
165
				<h3 style="color: #e74c3c!important; margin-bottom:30px;">Oops, something is wrong with one of your services!</h3>
166
				<div>Monitoring services at <strong>$enginename</strong> noticed that the <strong>$opt_s</strong> service on the server <strong>$servername</strong> stopped responding at <strong>$first_failure</strong>.</div>
167
				<br>
168
				<div>The server has IP address <strong>$opt_h</strong>, id <strong>$opt_g</strong> and is owned by <strong>$user</strong>.</div>
169
				<br>
170
				<div>
171
					To access the server and hopefully fix the problems, click <a href="$enginelink">here</a>.<br>
172
				</div>
173
				<br>
174
				<div>Thanks,<br>your friendly monitoring daemon</div>
175
			</div>
176
		</div>
177
	</body>
178
</html>
179
END
180
;
181
        my $recoveryhtml = <<END;
182
<!DOCTYPE html
183
	PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
184
	 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
185
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
186
	<head>
187
		<title>$exclamation $servername:$opt_s is $ALERT</title>
188
		<meta http-equiv="Pragma" content="no-cache" />
189
		<link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css" />
190
		<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
191
	</head>
192
	<body class="tundra">
193
		<div>
194
			<div class="well" style="margin:20px;">
195
				<h3 style="color: #27ae60!important; margin-bottom:30px;">Good news! one of your services resumed normal operations</h3>
196
				<div>Monitoring services at <strong>$enginename</strong> noticed that the <strong>$opt_s</strong> service on the server <strong>$servername</strong> resumed responding at <strong>$t</strong>.</div>
197
				<br>
198
				<div>The server has IP address <strong>$opt_h</strong>, id <strong>$opt_g</strong> and is owned by <strong>$user</strong>.</div>
199
				<br>
200
				<div>Thanks,<br>your friendly monitoring daemon</div>
201
			</div>
202
		</div>
203
	</body>
204
</html>
205
END
206
;
207

    
208
        my $msg = MIME::Lite->new(
209
         From     => 'monitoring',
210
         To       => $mailaddrs,
211
         Type     => 'multipart/alternative',
212
         Subject  => $subject,
213
         );
214
         $msg->add("sysuuid" => $sysuuid);
215

    
216
         my $att_text = MIME::Lite->new(
217
           Type     => 'text',
218
           Data     => $mailtext,
219
           Encoding => 'quoted-printable',
220
         );
221
         $att_text->attr('content-type'
222
           => 'text/plain; charset=UTF-8');
223
         $msg->attach($att_text);
224

    
225
         my $att_html = MIME::Lite->new(
226
          Type     => 'text',
227
          Data     => (($ALERT eq 'UP')?$recoveryhtml:$mailhtml),
228
          Encoding => 'quoted-printable',
229
         );
230
         $att_html->attr('content-type'
231
           => 'text/html; charset=UTF-8');
232
         $msg->attach($att_html);
233

    
234
         logit($user, $subject);
235
         my $res = $msg->send;
236

    
237
        if (0 && $doxmpp) { # XMPP disabled for now
238
            my $basedir = '/var/www/stabile';
239
            $basedir = `cat /etc/stabile/basedir` if (-e "/etc/stabile/basedir");
240
            chomp $basedir;
241
            $ENV{REMOTE_USER} = $user;
242
            require "$basedir/cgi/Stabile.pm";
243
            foreach my $to (split /, */, $mailaddrs) {
244
               my $xres = xmppSend($to, $xmpptext, $engineid, $sysuuid);
245
            }
246

    
247
        # Send alerts to Origo operators on duty
248
            $msg->replace('to', $oponduty);
249
            $msg->replace('subject', "[OPERATOR] $subject");
250
            $msg->send;
251
            my $xres = xmppSend($oponduty, "[OPERATOR]\n$xmpptext", $engineid, $sysuuid);
252
         }
253
    }
254
}
255

    
256
sub updateClientUI {
257
    unless (tie %domreg,'Tie::DBI', {
258
        db=>'mysql:steamregister',
259
        table=>'domains',
260
        key=>'uuid',
261
        autocommit=>0,
262
        CLOBBER=>3,
263
        user=>$dbiuser,
264
        password=>$dbipasswd}) {return "Error connecting to DB"};
265

    
266
    my $serv = $domreg{$opt_g};
267
    my $user = $serv->{'user'};
268
    my $servername = $serv->{'name'};
269
    untie %domreg;
270
    if ($user) {
271
        my $lastcheck = ($ENV{'MON_LAST_SUCCESS'}>$ENV{'MON_LAST_FAILURE'})?$ENV{'MON_LAST_SUCCESS'}:$ENV{'MON_LAST_FAILURE'};
272
#        my $det = $details;
273
#        $det =~ s/-//g;
274
#        $det =~ s/^\n//;
275
#        $det =~ s/\n+/\n/g;
276
#        $det =~ s/\n/\\n/g;
277
        my $det = qq|Please refresh to see details about this monitors last check.|;
278
        my $stat = lc($ALERT);
279
        $stat = 'ok' if ($stat eq 'up');
280

    
281
        my $newtasks = qq|{"type":"update","tab":"monitors","timestamp":$opt_t,"sender":"alert"| .
282
        ($opt_g?",\"uuid\":\"$opt_g\"":"") .
283
        (($opt_g && $opt_s)?",\"id\":\"$opt_g:$opt_s\"":"") .
284
        ($ALERT?",\"status\":\"$stat\"":"") .
285
        ",\"last_check\":\"$lastcheck\"" .
286
        ",\"last_detail\":\"$det\"" .
287
        (($stat eq 'ok')?",\"ack\":0":"") .
288
        ",\"ackcomment\":\"$ENV{'MON_ACK'}\"" .
289
        ",\"opstatus\":\"$ENV{'MON_OPSTATUS'}\"" .
290
        ",\"first_failure\":\"$ENV{'MON_FIRST_FAILURE'}\"" .
291
        "}, ";
292

    
293
        $newtasks = $1 if ($newtasks =~ /(.+)/); #untaint
294

    
295
        my $fi = scalar @files; `echo "$opt_t UI updating the $fi $ENV{'MON_OPSTATUS'} monitors for $f $user" >> /tmp/monlog`;
296
        opendir my($dh), '/tmp' or die "Couldn't open '/tmp': $!";
297
        my @files = grep { /^$user~.*\.tasks$/ } readdir $dh;
298
        closedir $dh;
299
        my $t = new Proc::ProcessTable;
300
        my @ptable = @{$t->table};
301
        my @pfiles;
302
        foreach my $f (@files) {
303
            foreach my $p ( @ptable ){
304
                my $pcmd = $p->cmndline;
305
                if ($pcmd =~ /\/tmp\/$f/) { # Only include pipes with active listeners
306
                    push @pfiles, "/tmp/$f";
307
                    last;
308
                }
309
            }
310
        };
311
        my $tasksfiles = join(' ', @pfiles);
312
        $tasksfiles = $1 if ($tasksfiles =~ /(.+)/); #untaint
313
        # Write to users named pipes if user is logged in and session file found
314
        if ($tasksfiles) {
315
            $res = `/bin/echo \'$newtasks\' | /usr/bin/tee  $tasksfiles \&`;
316
        } else {
317
            # If session file not found, append to orphan tasks file wait a sec and reload
318
            $res = `/bin/echo \'$newtasks\' >> /tmp/$user.tasks`;
319
            $res .= `chmod 666 /tmp/$user.tasks`;
320
            sleep 1;
321
            eval {`/usr/bin/pkill -HUP -f ui_update`; 1;} or do {;};
322
        };
323
        return ($user, $servername);
324
    } else {
325
        return "Error getting user for $opg_g";
326
    }
327
}
328

    
329
sub getSavedOpstatus {
330
    my $dounbackslash = shift;
331
    my $opfile = "/etc/stabile/opstatus";
332
    my @oparray;
333
    my %opstatus;
334
    # Build hash (%opstatus) with opstatus'es etc. to use for services that are in state unknown because of mon reload
335
    if (-e $opfile) {
336
        open(FILE, $opfile) or {throw Error::Simple("Unable to acces opstatus")};
337
        @oparray = <FILE>;
338
        close(FILE);
339
        foreach my $line (@oparray) {
340
            my @pairs = split(/ /,$line);
341
            my %h;
342
            foreach my $pair (@pairs) {
343
                my ($key, $val) = split(/=/,$pair);
344
                if ($key eq 'last_result' || !$dounbackslash) {
345
                    $h{$key} = $val;
346
                } else {
347
                    $val =~ s/\\/\\x/g;
348
                    $h{$key} = unbackslash($val);
349
                }
350
            }
351
            $h{'line'} = $line;
352
            $opstatus{"$h{'group'}:$h{'service'}"} = \%h;
353
        }
354
    }
355
    return %opstatus;
356
}
357

    
358
sub doSendAlert {
359
    my ($group, $service) = @_;
360
    if ($opstatus{"$group:$service"} && $opstatus{"$group:$service"}->{'opstatus'} eq '0'
361
#        && ($opstatus{"$group:$service"}->{'alerts_sent'} >= 1 || $opstatus{"$group:$service"}->{'ack'})) {
362
        && ($opstatus{"$group:$service"}->{'ack'} )) {
363
        return 0; # This service is already down and alerts have been sent
364
    } else {
365
        return 1;
366
    }
367
}
368

    
369
sub logit {
370
    my ($loguser, $msg) = @_;
371
    $loguser = $loguser || 'irigo';
372
    my $logfile = "/var/log/stabile/steam.log";
373
    my $current_time = time;
374
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($current_time);
375
    $year += 1900;
376
    my $month = substr("0" . ($mon+1), -2);
377
    my $pretty_time = sprintf "%4d-%02d-%02d@%02d:%02d:%02d",$year,$mon+1,$mday,$hour,$min,$sec;
378
	if ($msg && $msg ne '') {
379
	    utf8::decode($msg);
380
		unless (open(TEMP3, ">>$logfile")) {$posterror .= "Status=Error log file '$logfile' could not be written";}
381
		print TEMP3 $pretty_time, " : $loguser : $msg\n";
382
		close(TEMP3);
383
	}
384
}
385

    
386
sub xmppSend {
387
    my ($to, $msg, $engineid, $sysuuid) = @_;
388
    $engineid = `cat /etc/stabile/config.cfg | sed -n -e 's/^ENGINEID=//p'` unless ($engineid);
389
    my $doxmpp = `cat /etc/stabile/config.cfg | sed -n -e 's/^DO_XMPP=//p'`;
390
    if (!$doxmpp) {
391
        return "INFO: DO_XMPP not enabled in config\n";
392

    
393
    } elsif ($to && $msg) {
394
        my $xdom;
395
        $xdom = $1 if ($to =~ /\@(.+\..+)$/);
396
        if ($to eq $oponduty || ($xdom && `host -t SRV _xmpp-server._tcp.$xdom` !~ /NXDOMAIN/)) {
397
            require LWP::Simple;
398
            my $browser = LWP::UserAgent->new;
399
            $browser->agent('stabile/1.0b');
400
            $browser->protocols_allowed( [ 'http','https'] );
401
            $browser->timeout(10);
402
            my $tktcfg = ConfigReader::Simple->new("/etc/apache2/conf-available/auth_tkt_cgi.conf", [qw(TKTAuthSecret)]);
403
            my $tktkey = $tktcfg->get('TKTAuthSecret') || '';
404
            my $tkthash = sha512_hex($tktkey);
405
            my $msg_esc = uri_escape($msg);
406
            my $posturl = "https://www.origo.io/irigo/engine.cgi?action=xmppsend";
407
            my $cmd = "engineid=$engineid\&enginetkthash=$tkthash\&to=$to\&sysuuid=$sysuuid\&msg=$msg_esc";
408
            my $ret = system(qq|/usr/bin/curl -k --data "engineid=$engineid\&enginetkthash=$tkthash\&to=$to\&sysuuid=$sysuuid\&msg=$msg_esc" $posturl \&|);
409

    
410
            #my $async = HTTP::Async->new;
411
            #my $post = POST $posturl,
412
            #    [   engineid => $engineid,
413
            #        enginetkthash => $tkthash,
414
            #        to => $to,
415
            #        msg => $msg
416
            #    ];
417
            #$async->add( $post );
418

    
419
            #my $postreq = ();
420
            #$postreq->{'engineid'} = $engineid;
421
            #$postreq->{'enginetkthash'} = $tkthash;
422
            #$postreq->{'to'} = $to;
423
            #$postreq->{'msg'} = $msg;
424
            #$content = $browser->post($posturl, $postreq)->content();
425

    
426
            return "OK: Sent xmpp message to $to $ret\n";
427
        } else {
428
            return "INFO: srv records not found for $xdom\n";
429
        }
430

    
431
    } else {
432
        return "ERROR: Invalid xmpp data $to, $msg\n";
433
    }
434
};
(24-24/27)