version up eggs.
[slapos.git] / stack / monitor / monitor.py.in
1 #!{{ python_executable }}
2
3 import datetime
4 import json
5 import os
6 import subprocess
7 import sys
8 import sqlite3
9 import time
10 import threading
11 from optparse import OptionParser, make_option
12
13
14 FAILURE = "FAILURE"
15 SUCCESS = "SUCCESS"
16
17 db_path = "{{ monitor_parameter['db-path'] }}"
18 instance_path = "{{ directory['home'] }}"
19 monitor_dir = "{{ directory['monitor-custom-scripts'] }}"
20 pid_dir = "{{ directory['run'] }}"
21 promise_dir = "{{ directory['promise'] }}"
22
23 monitoring_file_json = "{{ monitoring_file_json }}"
24
25 option_list = [
26 make_option("-a", "--all", action="store_true", dest="all",
27 help="test everything : promises, services, customs"),
28 make_option("-n", "--no-write", action="store_true", dest="only_stdout",
29 help="just show the json output on stdout"),
30 make_option("-m", "--monitors", action="store_true", dest="monitor",
31 help="add the custom monitoring file to the files to monitor"),
32 make_option("-p", "--promises", action="store_true", dest="promise",
33 help="add the promises\'file to the files to monitor"),
34 make_option("-s", "--services", action="store_true", dest="service",
35 help="add the file containing services\'pid to the files to monitor")
36 ]
37
38 class Popen(subprocess.Popen):
39 __timeout = None
40
41 def timeout(self, delay, delay_before_kill=5):
42 if self.__timeout is not None: self.__timeout.cancel()
43 self.__timeout = threading.Timer(delay, self.stop, [delay_before_kill])
44 self.__timeout.start()
45 def waiter():
46 self.wait()
47 self.__timeout.cancel()
48 threading.Thread(target=waiter).start()
49
50 def stop(self, delay_before_kill=5):
51 if self.__timeout is not None: self.__timeout.cancel()
52 self.terminate()
53 t = threading.Timer(delay_before_kill, self.kill)
54 t.start()
55 r = self.wait()
56 t.cancel()
57 return r
58
59 def init_db():
60 db = sqlite3.connect(db_path)
61 c = db.cursor()
62 c.executescript("""
63 CREATE TABLE IF NOT EXISTS status (
64 timestamp INTEGER UNIQUE,
65 status VARCHAR(255));
66 CREATE TABLE IF NOT EXISTS individual_status (
67 timestamp INTEGER,
68 status VARCHAR(255),
69 element VARCHAR(255),
70 output TEXT);
71 """)
72 db.commit()
73 db.close()
74
75 def getListOfScripts(directory):
76 """
77 Get the list of script inside of a directory (not recursive)
78 """
79 scripts = []
80 if os.path.exists(directory) and os.path.isdir(directory):
81 for file_name in os.listdir(directory):
82 file = os.path.join(directory, file_name)
83 if os.access(file, os.X_OK) and not os.path.isdir(file):
84 scripts.append(file)
85 else:
86 exit("There is a problem in your directories" \
87 "of monitoring. Please check them")
88 return scripts
89
90
91 def runServices(directory):
92 services = getListOfScripts(directory)
93 result = {}
94 for service in services:
95 service_path = os.path.join(pid_dir, service)
96 service_name = os.path.basename(service_path)
97 try:
98 pid = int(open(service_path).read())
99 ### because apache (or others) can write sockets
100 ### We also ignore not readable pid files
101 except (IOError, ValueError):
102 continue
103 try:
104 os.kill(pid, 0)
105 result[service_name] = ''
106 except OSError:
107 result[service_name] = "This service is not running anymore"
108 return result
109
110
111 def runScripts(directory):
112 scripts = getListOfScripts(directory)
113 # XXX script_timeout could be passed as parameters
114 script_timeout = 60 # in seconds
115 result = {}
116 for script in scripts:
117 command = [os.path.join(promise_dir, script)]
118 script = os.path.basename(command[0])
119 result[script] = ''
120
121 process_handler = Popen(command,
122 cwd=instance_path,
123 env=None if sys.platform == 'cygwin' else {},
124 stdout=subprocess.PIPE,
125 stderr=subprocess.PIPE,
126 stdin=subprocess.PIPE)
127 process_handler.stdin.flush()
128 process_handler.stdin.close()
129 process_handler.stdin = None
130
131 process_handler.timeout(script_timeout)
132 process_handler.wait()
133
134 if process_handler.poll() is None:
135 process_handler.terminate()
136 result[script] = "Time Out"
137 elif process_handler.poll() != 0:
138 stderr = process_handler.communicate()[1]
139 if stderr is not None:
140 result[script] = stderr.strip()
141 return result
142
143
144 def writeFiles(monitors):
145 timestamp = int(time.time())
146 date = datetime.datetime.now().ctime()
147 init_db()
148 db = sqlite3.connect(db_path)
149 fail = False
150 for key, value in monitors.iteritems():
151 element_status = SUCCESS
152 if value != "" :
153 fail = True
154 element_status = FAILURE
155 db.execute("insert into individual_status(timestamp, element, output, status) values (?, ?, ?, ?)", (timestamp, key, value, element_status))
156 db.commit()
157 status = SUCCESS
158 if fail:
159 status = FAILURE
160 db.execute("insert into status(timestamp, status) values (?, ?)", (timestamp, status))
161 db.commit()
162 db.close()
163 monitors['datetime'] = date
164 open(monitoring_file_json, "w+").write(json.dumps(monitors))
165
166 def main():
167 parser = OptionParser(option_list=option_list)
168 monitors = {}
169 (options, args) = parser.parse_args()
170
171 if not (options.monitor or options.promise
172 or options.service or options.all):
173 exit("Please provide at list one arg in : -a, -m, -p, -s")
174
175 if options.monitor or options.all:
176 monitors.update(runScripts(monitor_dir))
177 if options.promise or options.all:
178 monitors.update(runScripts(promise_dir))
179 if options.service or options.all:
180 monitors.update(runServices(pid_dir))
181
182 if options.only_stdout:
183 print json.dumps(monitors)
184 else:
185 writeFiles(monitors)
186 if len(monitors) == 0:
187 exit(0)
188 else:
189 exit(1)
190
191
192 if __name__ == "__main__":
193 main()