server.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import print_function, unicode_literals
  4. from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
  5. import cgi
  6. import difflib
  7. import json
  8. import logging
  9. import re
  10. import socket
  11. from SocketServer import ThreadingMixIn
  12. import time
  13. import ffstatus
  14. # each match of these regex is removed to normalize an ISP's name
  15. ISP_NORMALIZATIONS = [
  16. # normalize name: strip company indication
  17. re.compile(r'(AG|UG|G?mbH( ?& ?Co\.? ?(OH|K)G)?)$', flags=re.IGNORECASE),
  18. # normalize name: strip "pool" suffixes
  19. re.compile(r'(dynamic )?(customer |subscriber )?(ip )?(pool|(address )?range|addresses)$', flags=re.IGNORECASE),
  20. # normalize name: strip "B2B" and aggregation suffixes
  21. re.compile(r'(aggregate|aggregation)?$', flags=re.IGNORECASE),
  22. re.compile(r'(B2B)?$', flags=re.IGNORECASE),
  23. # normalize name: strip country suffixes (in Germany)
  24. re.compile(r'(' +
  25. 'DE|Deutschland|Germany|' +
  26. 'Nordrhein[- ]Westfalen|NRW|' +
  27. 'Baden[- ]Wuerttemburg|BW|' +
  28. 'Hessen|' +
  29. 'Niedersachsen|' +
  30. 'Rheinland[- ]Pfalz|RLP' +
  31. ')$',
  32. flags=re.IGNORECASE),
  33. ]
  34. REGEX_QUERYPARAM = re.compile(
  35. r'(?P<key>.+?)=(?P<value>.+?)(&|$)')
  36. REGEX_URL_NODEINFO = re.compile(
  37. r'node/(?P<id>[a-fA-F0-9]{12})(?P<cmd>\.json|/[a-zA-Z0-9_\-\.]+)$')
  38. REGEX_URL_NODESTATUS = re.compile(
  39. r'status/([a-f0-9]{12})$')
  40. def normalize_ispname(isp):
  41. """Removes all matches on ISP_NORMALIZATIONS."""
  42. isp = isp.strip()
  43. for regex in ISP_NORMALIZATIONS:
  44. isp = regex.sub('', isp).strip()
  45. return isp
  46. class BatcaveHttpRequestHandler(BaseHTTPRequestHandler):
  47. """Handles a single HTTP request to the BATCAVE."""
  48. def __init__(self, request, client_address, sockserver):
  49. self.logger = logging.getLogger('API')
  50. BaseHTTPRequestHandler.__init__(
  51. self, request, client_address, sockserver)
  52. def __parse_url_pathquery(self):
  53. """Extracts the query parameters from the request path."""
  54. url = re.match(r'^/(?P<path>.*?)(\?(?P<query>.+))?$', self.path.strip())
  55. if url is None:
  56. logging.warn('Failed to parse URL \'' + str(self.path) + '\'.')
  57. return (None, None)
  58. path = url.group('path')
  59. query = {}
  60. if not url.group('query') is None:
  61. for match in REGEX_QUERYPARAM.finditer(url.group('query')):
  62. query[match.group('key')] = match.group('value')
  63. return (path, query)
  64. def do_GET(self):
  65. """Handles all HTTP GET requests."""
  66. path, query = self.__parse_url_pathquery()
  67. if path is None:
  68. self.send_error(400, 'Could not parse URL (' + str(self.path) + ')')
  69. return
  70. # / - index page, shows generic help
  71. if path == '':
  72. self.__respond_index(query)
  73. return
  74. # /nodes.json
  75. if path == 'nodes.json':
  76. self.__respond_nodes(query)
  77. return
  78. # /list - list stored nodes
  79. if path == 'list':
  80. self.__respond_list(query)
  81. return
  82. # /vpn - notification endpoint for gateway's VPN connections
  83. if path == 'vpn':
  84. self.__respond_vpn(query)
  85. return
  86. # /providers
  87. if path == 'providers':
  88. self.__respond_providers(query)
  89. return
  90. # /find?name=foo&fuzzy=1
  91. if path == 'find':
  92. self.__respond_findnode(query)
  93. return
  94. # /node/<id>.json - node's data
  95. # /node/<id>/field - return specific field from node's data
  96. match = REGEX_URL_NODEINFO.match(path)
  97. if match is not None:
  98. cmd = match.group('cmd')
  99. nodeid = match.group('id').lower()
  100. if cmd == '.json':
  101. self.__respond_node(nodeid)
  102. else:
  103. self.__respond_nodedetail(nodeid, cmd[1:])
  104. return
  105. # /status - overall status (incl. node and client count)
  106. if path == 'status':
  107. self.__respond_status()
  108. return
  109. # /status/<id> - node's status
  110. match = REGEX_URL_NODESTATUS.match(path)
  111. if match is not None:
  112. self.__respond_nodestatus(match.group(1))
  113. return
  114. # no match -> 404
  115. self.send_error(404, 'The URL \'{0}\' was not found here.'.format(path))
  116. def do_POST(self):
  117. """Handles all HTTP POST requests."""
  118. path, query = self.__parse_url_pathquery()
  119. if path is None:
  120. self.send_error(400, 'Could not parse URL (' + str(self.path) + ')')
  121. return
  122. params = self.__parse_post_params()
  123. # node id/mac to name mapping
  124. if path == 'idmac2name':
  125. self.__respond_nodeidmac2name(params)
  126. return
  127. # no match -> 404
  128. self.send_error(404, 'The URL \'{0}\' was not found here.'.format(path))
  129. def __send_nocache_headers(self):
  130. """
  131. Sets HTTP headers indicating that this response shall not be cached.
  132. """
  133. self.send_header('Cache-Control', 'no-cache, no-store, must-revalidate')
  134. self.send_header('Pragma', 'no-cache')
  135. self.send_header('Expires', '0')
  136. def __send_headers(self,
  137. content_type='text/html; charset=utf-8',
  138. nocache=True, extra={}):
  139. """Send HTTP 200 Response header with the given Content-Type.
  140. Optionally send no-caching headers, too."""
  141. self.send_response(200)
  142. self.send_header('Content-Type', content_type)
  143. if nocache:
  144. self.__send_nocache_headers()
  145. for key in extra:
  146. self.send_header(key, extra[key])
  147. self.end_headers()
  148. def __parse_post_params(self):
  149. ctype, pdict = cgi.parse_header(self.headers.getheader('content-type'))
  150. if ctype == 'multipart/form-data':
  151. postvars = cgi.parse_multipart(self.rfile, pdict)
  152. elif ctype == 'application/x-www-form-urlencoded':
  153. length = int(self.headers.getheader('content-length'))
  154. postvars = cgi.parse_qs(
  155. self.rfile.read(length),
  156. keep_blank_values=1,
  157. )
  158. else:
  159. postvars = {}
  160. return postvars
  161. def __respond_index(self, query):
  162. """Display the index page."""
  163. self.__send_headers()
  164. index_page = '''<!DOCTYPE html>
  165. <html><head><title>BATCAVE</title></head>
  166. <body>
  167. <H1 title="Batman/Alfred Transmission Collection, Aggregation & Value Engine">
  168. BATCAVE
  169. </H1>
  170. <p>Dies ist ein interner Hintergrund-Dienst. Er wird nur von anderen Diensten
  171. angesprochen und sollte aus einer Mehrzahl von Gr&uuml;nden nicht
  172. &ouml;ffentlich zug&auml;nglich sein.</p>
  173. <H2>API</H2>
  174. <p>
  175. Grunds&auml;tzlich ist das Antwort-Format JSON und alle Daten sind
  176. Live-Daten (kein Cache) die ggf. etwas Bearbeitungs-Zeit erfordern.
  177. </p>
  178. <dl>
  179. <dt>GET <a href="/nodes.json">nodes.json</a></dt>
  180. <dd>zur Verwendung mit ffmap (MACs anonymisiert)</dd>
  181. <dt>GET /node/&lt;id&gt;.json</dt>
  182. <dd>alle Daten zu dem gew&uuml;nschten Knoten</dd>
  183. <dt>GET /providers?format=json</dt>
  184. <dd>Liste der Provider</dd>
  185. <dt>GET <a href="/status">/status</a></dt>
  186. <dd>Status der BATCAVE inkl. Zahl der Nodes+Clients (JSON)</dd>
  187. <dt>GET /status/&lt;id&gt;</dt>
  188. <dd>Status des Knotens</dd>
  189. </dl>
  190. </body></html>'''
  191. self.wfile.write(index_page)
  192. def __respond_list(self, query):
  193. """List stored data."""
  194. self.__send_headers()
  195. self.wfile.write('<!DOCTYPE html><html>\n')
  196. self.wfile.write('<head><title>BATCAVE</title></head>\n')
  197. self.wfile.write('<body>\n')
  198. self.wfile.write('<H1>BATCAVE - LIST</H1>\n')
  199. self.wfile.write('<table>\n')
  200. self.wfile.write('<thead><tr><th>ID</th><th>Name</th></tr></thead>\n')
  201. self.wfile.write('<tbody>\n')
  202. sortkey = query['sort'] if 'sort' in query else None
  203. data = self.server.storage.get_nodes(sortby=sortkey)
  204. for node in data:
  205. nodeid = node.get('node_id')
  206. nodename = node.get('hostname', '&lt;?&gt;')
  207. self.wfile.write('<tr>\n')
  208. self.wfile.write(' <td><a href="/node/{0}.json">{0}</a></td>\n'.format(nodeid))
  209. self.wfile.write(' <td>{0}</td>\n'.format(nodename))
  210. self.wfile.write('</tr>\n')
  211. self.wfile.write('</tbody>\n')
  212. self.wfile.write('</table>\n')
  213. def __map_item(self, haystack, needle, prefix=None):
  214. if not isinstance(haystack, dict):
  215. raise Exception("haystack must be a dict")
  216. if needle in haystack:
  217. return haystack[needle]
  218. idx = len(haystack) + 1
  219. name = prefix + str(idx)
  220. while name in haystack:
  221. idx += 1
  222. name = prefix + str(idx)
  223. haystack[needle] = name
  224. return name
  225. def __respond_nodes(self, query):
  226. indent = 2 if query.get('pretty', 0) == '1' else None
  227. nodes = []
  228. clientmapping = {}
  229. for node in self.server.storage.get_nodes():
  230. sw = node.get('software', {})
  231. entry = {
  232. 'id': node.get('node_id'),
  233. 'name': node.get('hostname'),
  234. 'clients': [self.__map_item(clientmapping, x, "c")
  235. for x in node.get('clients', [])],
  236. 'autoupdater': sw.get('autoupdater', 'unknown'),
  237. 'firmware': sw.get('firmware'),
  238. }
  239. geo = node.get('location', None)
  240. if geo is not None:
  241. entry['geo'] = [geo['latitude'], geo['longitude']]
  242. nodes.append(entry)
  243. result = {'nodes': nodes}
  244. self.__send_headers(content_type='application/json',
  245. extra={'Content-Disposition': 'inline'})
  246. self.wfile.write(json.dumps(result, indent=indent))
  247. def __respond_node(self, rawid):
  248. """Display node data."""
  249. # search node by the given id
  250. node = self.server.storage.find_node(rawid)
  251. # handle unknown nodes
  252. if node is None:
  253. self.send_error(404, 'No node with id \'' + rawid + '\' present.')
  254. return
  255. # dump node data as JSON
  256. self.__send_headers('application/json',
  257. extra={'Content-Disposition': 'inline'})
  258. self.wfile.write(json.dumps(node))
  259. def __respond_nodestatus(self, rawid):
  260. """Display node status."""
  261. status = self.server.storage.get_nodestatus(rawid)
  262. if status is None:
  263. self.send_error(404, 'No node with id \'' + rawid + '\' present.')
  264. self.__send_headers('text/plain')
  265. self.wfile.write(status)
  266. def __respond_findnode(self, query):
  267. """Find nodes matching the supplied name."""
  268. self.__send_headers('application/json',
  269. extra={'Content-Disposition': 'inline'})
  270. name = query.get('name').lower()
  271. fuzzy = query.get('fuzzy', '0') == '1'
  272. names = {}
  273. for node in self.server.storage.get_nodes():
  274. nodename = node.get('hostname').lower()
  275. if nodename not in names:
  276. # first time we see this name
  277. names[nodename] = [node]
  278. else:
  279. # we've seen this name before
  280. names[nodename].append(node)
  281. allnames = [x for x in names]
  282. resultnames = []
  283. # check for exact match
  284. if name in allnames:
  285. # write the exact matches and we're done
  286. resultnames = [name]
  287. else:
  288. # are we allowed to fuzzy match?
  289. if not fuzzy:
  290. # no -> return zero matches
  291. self.wfile.write('[]')
  292. return
  293. # let's do fuzzy matching
  294. resultnames = difflib.get_close_matches(name, allnames,
  295. cutoff=0.75)
  296. result = []
  297. for possibility in resultnames:
  298. for x in names[possibility]:
  299. x_id = x.get('node_id')
  300. result.append({
  301. 'id': x_id,
  302. 'name': x.get('hostname'),
  303. 'status': self.server.storage.get_nodestatus(x_id),
  304. })
  305. self.wfile.write(json.dumps(result))
  306. def __respond_nodeidmac2name(self, ids):
  307. """Return a mapping of the given IDs (or MACs) into their hostname."""
  308. self.__send_headers('text/plain')
  309. for nodeid in ids:
  310. node = None
  311. if not ':' in nodeid:
  312. node = self.server.storage.find_node(nodeid)
  313. else:
  314. node = self.server.storage.find_node_by_mac(nodeid)
  315. nodename = node.get('hostname', nodeid) if node is not None else nodeid
  316. self.wfile.write('{0}={1}\n'.format(nodeid, nodename))
  317. def __respond_nodedetail(self, nodeid, field):
  318. """
  319. Return a field from the given node.
  320. String and integers are returned as text/plain,
  321. all other as JSON.
  322. """
  323. node = self.server.storage.find_node(nodeid, include_raw_data=True)
  324. if node is None:
  325. self.send_error(404, 'No node with id \'' + nodeid + '\' present.')
  326. return
  327. return_count = False
  328. if field.endswith('.count'):
  329. return_count = True
  330. field = field[0:-6]
  331. if not field in node:
  332. self.send_error(
  333. 404,
  334. 'The node \'{0}\' does not have a field named \'{1}\'.'.format(
  335. nodeid, field
  336. )
  337. )
  338. return
  339. value = node[field]
  340. if return_count:
  341. value = len(value)
  342. no_json = isinstance(value, basestring) or isinstance(value, int)
  343. self.__send_headers('text/plain' if no_json else 'application/json',
  344. extra={'Content-Disposition': 'inline'})
  345. self.wfile.write(value if no_json else json.dumps(value))
  346. def __respond_status(self):
  347. status = self.server.storage.status
  348. self.__send_headers('application/json',
  349. extra={'Content-Disposition': 'inline'})
  350. self.wfile.write(json.dumps(status, indent=2))
  351. def __respond_vpn(self, query):
  352. storage = self.server.storage
  353. peername = query.get('peer')
  354. key = query.get('key')
  355. action = query.get('action')
  356. remote = query.get('remote')
  357. gateway = query.get('gw')
  358. timestamp = query.get('ts', time.time())
  359. if action == 'list':
  360. self.__respond_vpnlist()
  361. return
  362. if action != 'establish' and action != 'disestablish':
  363. self.logger.error('VPN: unknown action \'{0}\''.format(action))
  364. self.send_error(400, 'Invalid action.')
  365. return
  366. check = {
  367. 'peername': peername,
  368. 'key': key,
  369. 'remote': remote,
  370. 'gw': gateway,
  371. }
  372. for k, val in check.items():
  373. if val is None or len(val.strip()) == 0:
  374. self.logger.error('VPN {0}: no or empty {1}'.format(action, k))
  375. self.send_error(400, 'Missing value for ' + str(k))
  376. return
  377. try:
  378. if action == 'establish':
  379. self.server.storage.log_vpn_connect(
  380. key, peername, remote, gateway, timestamp)
  381. elif action == 'disestablish':
  382. self.server.storage.log_vpn_disconnect(key, gateway, timestamp)
  383. else:
  384. self.logger.error('Unknown VPN action \'%s\' not filtered.',
  385. action)
  386. self.send_error(500)
  387. return
  388. except ffstatus.exceptions.VpnKeyFormatError:
  389. self.logger.error('VPN peer \'{0}\' {1}: bad key \'{2}\''.format(
  390. peername, action, key,
  391. ))
  392. self.send_error(400, 'Bad key.')
  393. return
  394. self.__send_headers('text/plain')
  395. self.wfile.write('OK')
  396. storage.save()
  397. def __respond_vpnlist(self):
  398. self.__send_headers()
  399. self.wfile.write('''<!DOCTYPE html>
  400. <html><head><title>BATCAVE - VPN LIST</title></head>
  401. <body>
  402. <style type="text/css">
  403. table { border: 2px solid #999; border-collapse: collapse; }
  404. th, td { border: 1px solid #CCC; }
  405. table tbody tr.online { background-color: #CFC; }
  406. table tbody tr.offline { background-color: #FCC; }
  407. </style>
  408. <table>''')
  409. gateways = self.server.storage.get_vpn_gateways()
  410. gws_header = '<th>' + '</th><th>'.join(gateways) + '</th>'
  411. self.wfile.write('<thead>\n')
  412. self.wfile.write('<tr><th rowspan="2">names (key)</th>')
  413. self.wfile.write('<th colspan="' + str(len(gateways)) + '">active</th>')
  414. self.wfile.write('<th colspan="' + str(len(gateways)) + '">last</th>')
  415. self.wfile.write('</tr>\n')
  416. self.wfile.write('<tr>' + gws_header + gws_header + '</tr>\n')
  417. self.wfile.write('</thead>\n')
  418. for item in self.server.storage.get_vpn_connections():
  419. row_class = 'online' if item['online'] else 'offline'
  420. self.wfile.write('<tr class="{0}">'.format(row_class))
  421. self.wfile.write('<td title="{0}">{1}</td>'.format(
  422. item['key'],
  423. ' / '.join(item['names']) if len(item['names']) > 0 else '?',
  424. ))
  425. for conntype in ['active', 'last']:
  426. for gateway in gateways:
  427. remote = ''
  428. if conntype in item['remote'] and \
  429. gateway in item['remote'][conntype]:
  430. remote = item['remote'][conntype][gateway]
  431. if isinstance(remote, dict):
  432. remote = remote['name']
  433. symbol = '&check;' if len(remote) > 0 else '&times;'
  434. self.wfile.write('<td title="{0}">{1}</td>'.format(
  435. remote, symbol))
  436. self.wfile.write('</tr>\n')
  437. self.wfile.write('</table>\n')
  438. self.wfile.write('</body>')
  439. self.wfile.write('</html>')
  440. def __respond_providers(self, query):
  441. """Return a summary of providers."""
  442. outputformat = query['format'].lower() if 'format' in query else 'html'
  443. isps = {}
  444. ispblocks = {}
  445. for item in self.server.storage.get_vpn_connections():
  446. if item['count']['active'] == 0:
  447. continue
  448. remotes = []
  449. for gateway in item['remote']['active']:
  450. remote = item['remote']['active'][gateway]
  451. remotes.append(remote)
  452. if len(remotes) == 0:
  453. self.logger.warn(
  454. 'VPN key \'%s\' is marked with active remotes but 0 found?',
  455. item['key'])
  456. continue
  457. item_isps = set()
  458. for remote in remotes:
  459. isp = "UNKNOWN"
  460. ispblock = remote
  461. if isinstance(remote, dict):
  462. ispblock = remote['name']
  463. desc_lines = remote['description'].split('\n')
  464. isp = normalize_ispname(desc_lines[0])
  465. if not isp in ispblocks:
  466. ispblocks[isp] = set()
  467. ispblocks[isp].add(ispblock)
  468. item_isps.add(isp)
  469. if len(item_isps) == 0:
  470. item_isps.add('unknown')
  471. elif len(item_isps) > 1:
  472. self.logger.warn(
  473. 'VPN key \'%s\' has %d active IPs ' +
  474. 'which resolved to %d ISPs: \'%s\'',
  475. item['key'],
  476. len(remotes),
  477. len(item_isps),
  478. '\', \''.join(item_isps)
  479. )
  480. for isp in item_isps:
  481. if not isp in isps:
  482. isps[isp] = 0
  483. isps[isp] += 1.0 / len(item_isps)
  484. isps_sum = sum([isps[x] for x in isps])
  485. if outputformat == 'csv':
  486. self.__send_headers('text/csv')
  487. self.wfile.write('Count;Name\n')
  488. for isp in isps:
  489. self.wfile.write('{0};"{1}"\n'.format(isps[isp], isp))
  490. elif outputformat == 'json':
  491. self.__send_headers('application/json',
  492. extra={'Content-Disposition': 'inline'})
  493. data = [
  494. {
  495. 'name': isp,
  496. 'count': isps[isp],
  497. 'percentage': isps[isp]*100.0/isps_sum,
  498. 'blocks': [block for block in ispblocks[isp]],
  499. } for isp in isps
  500. ]
  501. self.wfile.write(json.dumps(data))
  502. elif outputformat == 'html':
  503. self.__send_headers()
  504. self.wfile.write('''<!DOCTYPE html>
  505. <html>
  506. <head><title>BATCAVE - PROVIDERS</title></head>
  507. <body>
  508. <table border="2">
  509. <thead>
  510. <tr><th>Count</th><th>Percentage</th><th>Name</th><th>Blocks</th></tr>
  511. </thead>
  512. <tbody>\n''')
  513. for isp in sorted(isps, key=lambda x: isps[x], reverse=True):
  514. self.wfile.write('<tr><td>{0}</td><td>{1:.1f}%</td><td>{2}</td><td>{3}</td></tr>\n'.format(
  515. isps[isp],
  516. isps[isp]*100.0/isps_sum,
  517. isp,
  518. ', '.join(sorted(ispblocks[isp])) if isp in ispblocks else '?',
  519. ))
  520. self.wfile.write('</tbody></table>\n')
  521. self.wfile.write('<p>Totals: {0} ISPs, {1} connections</p>\n'.format(len(isps), isps_sum))
  522. self.wfile.write('</body></html>')
  523. else:
  524. self.send_error(400, 'Unknown output format.')
  525. class ApiServer(ThreadingMixIn, HTTPServer):
  526. def __init__(self, endpoint, storage):
  527. if ':' in endpoint[0]:
  528. self.address_family = socket.AF_INET6
  529. HTTPServer.__init__(self, endpoint, BatcaveHttpRequestHandler)
  530. self.storage = storage
  531. def __str__(self):
  532. return 'ApiServer on {0}'.format(self.server_address)
  533. if __name__ == '__main__':
  534. dummystorage = ffstatus.basestorage.BaseStorage()
  535. server = ApiServer(('0.0.0.0', 8888), dummystorage)
  536. print("Server:", str(server))
  537. server.serve_forever()