server.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import print_function, unicode_literals
  4. from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
  5. import cgi
  6. import json
  7. import logging
  8. import re
  9. import socket
  10. from SocketServer import ThreadingMixIn
  11. import time
  12. import ffstatus
  13. class BatcaveHttpRequestHandler(BaseHTTPRequestHandler):
  14. def __init__(self, request, client_address, server):
  15. self.logger = logging.getLogger('API')
  16. BaseHTTPRequestHandler.__init__(self, request, client_address, server)
  17. def parse_url_pathquery(self):
  18. """Extracts the query parameters from the request path."""
  19. url = re.match(r'^/(?P<path>.*?)(\?(?P<query>.+))?$', self.path.strip())
  20. if url is None:
  21. logging.warn('Failed to parse URL \'' + str(self.path) + '\'.')
  22. return ( None, None )
  23. path = url.group('path')
  24. query = {}
  25. if not url.group('query') is None:
  26. for m in re.finditer(r'(?P<key>.+?)=(?P<value>.+?)(&|$)', url.group('query')):
  27. query[m.group('key')] = m.group('value')
  28. return ( path, query )
  29. def do_GET(self):
  30. """Handles all HTTP GET requests."""
  31. path, query = self.parse_url_pathquery()
  32. if path is None:
  33. self.send_error(400, 'Could not parse URL (' + str(self.path) + ')')
  34. return
  35. # / - index page, shows generic help
  36. if path == '':
  37. self.respond_index(query)
  38. return
  39. # /list - list stored nodes
  40. if path == 'list':
  41. self.respond_list(query)
  42. return
  43. # /vpn - notification endpoint for gateway's VPN connections
  44. if path == 'vpn':
  45. self.respond_vpn(query)
  46. return
  47. # /providers
  48. if path == 'providers':
  49. self.respond_providers(query)
  50. return
  51. # /node/<id>.json - node's data
  52. # /node/<id>/field - return specific field from node's data
  53. m = re.match(r'node/(?P<id>[a-fA-F0-9]{12})(?P<cmd>\.json|/[a-zA-Z0-9_\-\.]+)$', path)
  54. if m != None:
  55. cmd = m.group('cmd')
  56. nodeid = m.group('id').lower()
  57. if cmd == '.json':
  58. self.respond_node(nodeid)
  59. else:
  60. self.respond_nodedetail(nodeid, cmd[1:])
  61. return
  62. # /status/<id> - node's status
  63. m = re.match(r'status/([a-f0-9]{12})$', path)
  64. if m != None:
  65. self.respond_nodestatus(m.group(1))
  66. return
  67. # no match -> 404
  68. self.send_error(404, 'The URL \'{0}\' was not found here.'.format(path))
  69. def do_POST(self):
  70. """Handles all HTTP POST requests."""
  71. path, query = self.parse_url_pathquery()
  72. if path is None:
  73. self.send_error(400, 'Could not parse URL (' + str(self.path) + ')')
  74. return
  75. params = self.parse_post_params()
  76. # node id/mac to name mapping
  77. if path == 'idmac2name':
  78. self.respond_nodeidmac2name(params)
  79. return
  80. # no match -> 404
  81. self.send_error(404, 'The URL \'{0}\' was not found here.'.format(path))
  82. def send_nocache_headers(self):
  83. """Sets HTTP headers indicating that this response shall not be cached."""
  84. self.send_header('Cache-Control', 'no-cache, no-store, must-revalidate')
  85. self.send_header('Pragma', 'no-cache')
  86. self.send_header('Expires', '0')
  87. def send_headers(self, content_type='text/html; charset=utf-8', nocache=True):
  88. """Send HTTP 200 Response header with the given Content-Type.
  89. Optionally send no-caching headers, too."""
  90. self.send_response(200)
  91. self.send_header('Content-Type', content_type)
  92. if nocache:
  93. self.send_nocache_headers()
  94. self.end_headers()
  95. def parse_post_params(self):
  96. ctype, pdict = cgi.parse_header(self.headers.getheader('content-type'))
  97. if ctype == 'multipart/form-data':
  98. postvars = cgi.parse_multipart(self.rfile, pdict)
  99. elif ctype == 'application/x-www-form-urlencoded':
  100. length = int(self.headers.getheader('content-length'))
  101. postvars = cgi.parse_qs(self.rfile.read(length), keep_blank_values=1)
  102. else:
  103. postvars = {}
  104. return postvars
  105. def respond_index(self, query):
  106. """Display the index page."""
  107. storage = self.server.storage
  108. self.send_headers()
  109. self.wfile.write('<!DOCTYPE html><html><head><title>BATCAVE</title></head>\n')
  110. self.wfile.write('<body>\n')
  111. self.wfile.write('<H1 title="Batman/Alfred Transmission Collection, Aggregation & Value Engine">BATCAVE</H1>\n')
  112. self.wfile.write('<p>Dies ist ein interner Hintergrund-Dienst. Er wird nur von anderen Diensten\n')
  113. self.wfile.write('angesprochen und sollte aus einer Mehrzahl von Gr&uuml;nden nicht &ouml;ffentlich\n')
  114. self.wfile.write('zug&auml;nglich sein.</p>\n')
  115. self.wfile.write('<H2>Status</H2>\n')
  116. self.wfile.write('Daten: <span id="datacount" class="value">')
  117. self.wfile.write(len(storage.data))
  118. self.wfile.write('</span>\n')
  119. self.wfile.write('<H2>API</H2>\n')
  120. self.wfile.write('<p>Grundsätzlich ist das Antwort-Format JSON und alle Daten sind Live-Daten (kein Cache) die ggf. etwas Bearbeitungs-Zeit erfordern.</p>')
  121. self.wfile.write('<dl>\n')
  122. self.wfile.write('<dt><a href="/nodes.json">nodes.json</a></dt><dd>zur Verwendung mit ffmap (MACs anonymisiert)</dd>\n')
  123. self.wfile.write('<dt><a href="/node/ff00ff00ff00.json">/node/&lt;id&gt;.json</a></dt><dd><u>alle</u> vorhandenen Information zu der gewünschten Node</dd>\n')
  124. self.wfile.write('</dl>\n')
  125. self.wfile.write('</body></html>')
  126. def respond_list(self, query):
  127. """List stored data."""
  128. self.send_headers()
  129. self.wfile.write('<!DOCTYPE html><html>\n')
  130. self.wfile.write('<head><title>BATCAVE</title></head>\n')
  131. self.wfile.write('<body>\n')
  132. self.wfile.write('<H1>BATCAVE - LIST</H1>\n')
  133. self.wfile.write('<table>\n')
  134. self.wfile.write('<thead><tr><th>ID</th><th>Name</th></tr></thead>\n')
  135. self.wfile.write('<tbody>\n')
  136. sortkey = query['sort'] if 'sort' in query else None
  137. data = self.server.storage.get_nodes(sortby=sortkey)
  138. for node in data:
  139. nodeid = node['node_id']
  140. nodename = node['hostname'] if 'hostname' in node else '&lt;?&gt;'
  141. self.wfile.write('<tr><td><a href="/node/' + nodeid + '.json">' + nodeid + '</a></td><td>' + nodename + '</td></tr>')
  142. self.wfile.write('</tbody>\n')
  143. self.wfile.write('</table>\n')
  144. def respond_node(self, rawid):
  145. """Display node data."""
  146. # handle API example linked on index page
  147. if rawid == 'ff00ff00ff00':
  148. self.send_headers('text/json')
  149. self.wfile.write(json.dumps({
  150. 'name': 'API-Example',
  151. 'nodeid': rawid,
  152. 'META': 'Dies ist ein minimaler Beispiel-Datensatz. Herzlichen Glückwunsch, du hast das Prinzip der API kapiert.',
  153. }))
  154. return
  155. # search node by the given id
  156. node = self.server.storage.find_node(rawid)
  157. # handle unknown nodes
  158. if node is None:
  159. self.send_error(404, 'No node with id \'' + rawid + '\' present.')
  160. return
  161. # dump node data as JSON
  162. self.send_headers('text/json')
  163. self.wfile.write(json.dumps(node))
  164. def respond_nodestatus(self, rawid):
  165. """Display node status."""
  166. status = self.server.storage.get_nodestatus(rawid)
  167. if status is None:
  168. self.send_error(404, 'No node with id \'' + rawid + '\' present.')
  169. self.send_headers('text/plain')
  170. self.wfile.write(status)
  171. def respond_nodeidmac2name(self, ids):
  172. """Return a mapping of the given IDs (or MACs) into their hostname."""
  173. self.send_headers('text/plain')
  174. for nodeid in ids:
  175. node = self.server.storage.find_node(nodeid) if not ':' in nodeid else self.server.storage.find_node_by_mac(nodeid)
  176. nodename = node['hostname'] if (not node is None) and 'hostname' in node else nodeid
  177. self.wfile.write('{0}={1}\n'.format(nodeid, nodename))
  178. def respond_nodedetail(self, nodeid, field):
  179. """Return a field from the given node - a string is returned as text, all other as JSON."""
  180. node = self.server.storage.find_node(nodeid)
  181. if node is None:
  182. self.send_error(404, 'No node with id \'' + nodeid + '\' present.')
  183. return
  184. return_count = False
  185. if field.endswith('.count'):
  186. return_count = True
  187. field = field[0:-6]
  188. if not field in node:
  189. self.send_error(404, 'The node \'' + nodeid + '\' does not have a field named \'' + str(field) + '\'.')
  190. return
  191. value = node[field]
  192. if return_count:
  193. value = len(value)
  194. self.send_headers('text/plain' if isinstance(value, basestring) or isinstance(value, int) else 'text/json')
  195. self.wfile.write(value if isinstance(value, basestring) else json.dumps(value))
  196. def respond_vpn(self, query):
  197. storage = self.server.storage
  198. peername = query['peer'] if 'peer' in query else None
  199. key = query['key'] if 'key' in query else None
  200. action = query['action'] if 'action' in query else None
  201. remote = query['remote'] if 'remote' in query else None
  202. gw = query['gw'] if 'gw' in query else None
  203. ts = query['ts'] if 'ts' in query else time.time()
  204. if action == 'list':
  205. self.respond_vpnlist()
  206. return
  207. if action != 'establish' and action != 'disestablish':
  208. self.logger.error('VPN: unknown action \'{0}\''.format(action))
  209. self.send_error(400, 'Invalid action.')
  210. return
  211. check = {'peername': peername, 'key': key, 'remote': remote, 'gw': gw}
  212. for k, val in check.items():
  213. if val is None or len(val.strip()) == 0:
  214. self.logger.error('VPN {0}: no or empty {1}'.format(action, k))
  215. self.send_error(400, 'Missing value for ' + str(k))
  216. return
  217. try:
  218. if action == 'establish':
  219. self.server.storage.log_vpn_connect(
  220. key, peername, remote, gw, ts)
  221. elif action == 'disestablish':
  222. self.server.storage.log_vpn_connect(key, gw, ts)
  223. else:
  224. self.logger.error('Unknown VPN action \'%s\' not filtered.',
  225. action)
  226. self.send_error(500)
  227. return
  228. except ffstatus.exceptions.VpnKeyFormatError:
  229. self.logger.error('VPN peer \'{0}\' {1}: bad key \'{2}\''.format(
  230. peername, action, key,
  231. ))
  232. self.send_error(400, 'Bad key.')
  233. return
  234. self.send_headers('text/plain')
  235. self.wfile.write('OK')
  236. storage.save()
  237. def respond_vpnlist(self):
  238. self.send_headers()
  239. self.wfile.write('<!DOCTYPE html>\n')
  240. self.wfile.write('<html><head><title>BATCAVE - VPN LIST</title></head>\n')
  241. self.wfile.write('<body>\n')
  242. self.wfile.write('<style type="text/css">\n')
  243. self.wfile.write('table { border: 2px solid #999; border-collapse: collapse; }\n')
  244. self.wfile.write('th, td { border: 1px solid #CCC; }\n')
  245. self.wfile.write('table tbody tr.online { background-color: #CFC; }\n')
  246. self.wfile.write('table tbody tr.offline { background-color: #FCC; }\n')
  247. self.wfile.write('</style>\n')
  248. self.wfile.write('<table>\n<thead>\n')
  249. gateways = self.server.storage.get_vpn_gateways()
  250. self.wfile.write('<tr><th rowspan="2">names (key)</th><th colspan="' + str(len(gateways)) + '">active</th><th colspan="' + str(len(gateways)) + '">last</th></tr>\n')
  251. self.wfile.write('<tr><th>' + '</th><th>'.join(gateways) + '</th><th>' + '</th><th>'.join(gateways) + '</th></tr>\n')
  252. self.wfile.write('</thead>\n')
  253. for item in self.server.storage.get_vpn_connections():
  254. self.wfile.write('<tr class="{0}">'.format('online' if item['online'] else 'offline'))
  255. self.wfile.write('<td title="{0}">{1}</td>'.format(
  256. item['key'],
  257. ' / '.join(item['names']) if len(item['names']) > 0 else '?',
  258. ))
  259. for conntype in ['active', 'last']:
  260. for gateway in gateways:
  261. remote = ''
  262. if conntype in item['remote'] and gateway in item['remote'][conntype]:
  263. remote = item['remote'][conntype][gateway]
  264. if isinstance(remote, dict):
  265. remote = remote['name']
  266. symbol = '&check;' if len(remote) > 0 else '&times;'
  267. self.wfile.write('<td title="{0}">{1}</td>'.format(
  268. remote, symbol))
  269. self.wfile.write('</tr>\n')
  270. self.wfile.write('</table>\n')
  271. self.wfile.write('</body>')
  272. self.wfile.write('</html>')
  273. def respond_providers(self, query):
  274. """Return a summary of providers."""
  275. outputformat = query['format'].lower() if 'format' in query else 'html'
  276. isps = {}
  277. ispblocks = {}
  278. for item in self.server.storage.get_vpn_connections():
  279. if item['count']['active'] == 0:
  280. continue
  281. remotes = []
  282. for gateway in item['remote']['active']:
  283. remote = item['remote']['active'][gateway]
  284. remotes.append(remote)
  285. if len(remotes) == 0:
  286. self.logger.warn(
  287. 'VPN key \'%s\' is marked with active remotes but 0 found?',
  288. item['key'])
  289. continue
  290. item_isps = set()
  291. for remote in remotes:
  292. isp = "UNKNOWN"
  293. ispblock = remote
  294. if isinstance(remote, dict):
  295. ispblock = remote['name']
  296. desc_lines = remote['description'].split('\n')
  297. isp = desc_lines[0].strip()
  298. # normalize name: strip company indication
  299. isp = re.sub(r'(AG|UG|G?mbH( ?& ?Co\.? ?(OH|K)G)?)$', '', isp, flags=re.IGNORECASE).strip()
  300. # normalize name: strip "pool" suffixes
  301. isp = re.sub(r'(dynamic )?(customer |subscriber )?(ip )?(pool|(address )?range|addresses)$', '', isp, flags=re.IGNORECASE).strip()
  302. # normalize name: strip "B2B" and aggregation suffixes
  303. isp = re.sub(r'(aggregate|aggregation)?$', '', isp, flags=re.IGNORECASE).strip()
  304. isp = re.sub(r'(B2B)?$', '', isp, flags=re.IGNORECASE).strip()
  305. # normalize name: strip country suffixes (in Germany)
  306. isp = re.sub(r'(DE|Deutschland|Germany|Nordrhein[- ]Westfalen|NRW|Baden[- ]Wuerttemburg|BW|Hessen|Niedersachsen|Rheinland[- ]Pfalz|RLP)$', '', isp, flags=re.IGNORECASE).strip()
  307. isp = str(isp)
  308. if not isp in ispblocks:
  309. ispblocks[isp] = set()
  310. ispblocks[isp].add(ispblock)
  311. item_isps.add(isp)
  312. if len(item_isps) == 0:
  313. item_isps.add('unknown')
  314. elif len(item_isps) > 1:
  315. self.logger.warn('VPN key \'{0}\' has {1} active IPs which resolved to {2} ISPs: \'{3}\''.format(key, len(ips), len(item_isps), '\', \''.join(item_isps)))
  316. for isp in item_isps:
  317. if not isp in isps:
  318. isps[isp] = 0
  319. isps[isp] += 1.0 / len(item_isps)
  320. isps_sum = sum([isps[x] for x in isps])
  321. if outputformat == 'csv':
  322. self.send_headers('text/csv')
  323. self.wfile.write('Count;Name\n')
  324. for isp in isps:
  325. self.wfile.write('{0};"{1}"\n'.format(isps[isp], isp))
  326. elif outputformat == 'json':
  327. self.send_headers('text/json')
  328. data = [
  329. {
  330. 'name': isp,
  331. 'count': isps[isp],
  332. 'percentage': isps[isp]*100.0/isps_sum,
  333. 'blocks': [block for block in ispblocks[isp]],
  334. } for isp in isps
  335. ]
  336. self.wfile.write(json.dumps(data))
  337. elif outputformat == 'html':
  338. self.send_headers()
  339. self.wfile.write('<!DOCTYPE html><html>\n')
  340. self.wfile.write('<head><title>BATCAVE - PROVIDERS</title></head>\n')
  341. self.wfile.write('<body>\n')
  342. self.wfile.write('<table border="2">\n')
  343. self.wfile.write('<thead><tr><th>Count</th><th>Percentage</th><th>Name</th><th>Blocks</th></tr></thead>\n')
  344. self.wfile.write('<tbody>\n')
  345. for isp in sorted(isps, key=lambda x: isps[x], reverse=True):
  346. self.wfile.write('<tr><td>{0}</td><td>{1:.1f}%</td><td>{2}</td><td>{3}</td></tr>\n'.format(
  347. isps[isp],
  348. isps[isp]*100.0/isps_sum,
  349. isp,
  350. ', '.join(sorted(ispblocks[isp])) if isp in ispblocks else '?',
  351. ))
  352. self.wfile.write('</tbody></table>\n')
  353. self.wfile.write('<p>Totals: {0} ISPs, {1} connections</p>\n'.format(len(isps), isps_sum))
  354. self.wfile.write('</body></html>')
  355. else:
  356. self.send_error(400, 'Unknown output format.')
  357. class ApiServer(ThreadingMixIn, HTTPServer):
  358. def __init__(self, endpoint, storage):
  359. if ':' in endpoint[0]:
  360. self.address_family = socket.AF_INET6
  361. HTTPServer.__init__(self, endpoint, BatcaveHttpRequestHandler)
  362. self.storage = storage
  363. def __str__(self):
  364. return 'ApiServer on {0}'.format(self.server_address)
  365. if __name__ == '__main__':
  366. dummystorage = ffstatus.basestorage.BaseStorage()
  367. server = ApiServer(('0.0.0.0', 8888), dummystorage)
  368. print("Server:", str(server))
  369. server.serve_forever()