server.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import print_function, unicode_literals
  4. from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
  5. import cgi
  6. import json
  7. import logging
  8. import re
  9. import socket
  10. from SocketServer import ThreadingMixIn
  11. import time
  12. import ffstatus
  13. # each match of these regex is removed to normalize an ISP's name
  14. ISP_NORMALIZATIONS = [
  15. # normalize name: strip company indication
  16. re.compile(r'(AG|UG|G?mbH( ?& ?Co\.? ?(OH|K)G)?)$', flags=re.IGNORECASE),
  17. # normalize name: strip "pool" suffixes
  18. re.compile(r'(dynamic )?(customer |subscriber )?(ip )?(pool|(address )?range|addresses)$', flags=re.IGNORECASE),
  19. # normalize name: strip "B2B" and aggregation suffixes
  20. re.compile(r'(aggregate|aggregation)?$', flags=re.IGNORECASE),
  21. re.compile(r'(B2B)?$', flags=re.IGNORECASE),
  22. # normalize name: strip country suffixes (in Germany)
  23. re.compile(r'(' +
  24. 'DE|Deutschland|Germany|' +
  25. 'Nordrhein[- ]Westfalen|NRW|' +
  26. 'Baden[- ]Wuerttemburg|BW|' +
  27. 'Hessen|' +
  28. 'Niedersachsen|' +
  29. 'Rheinland[- ]Pfalz|RLP' +
  30. ')$',
  31. flags=re.IGNORECASE),
  32. ]
  33. REGEX_QUERYPARAM = re.compile(
  34. r'(?P<key>.+?)=(?P<value>.+?)(&|$)')
  35. REGEX_URL_NODEINFO = re.compile(
  36. r'node/(?P<id>[a-fA-F0-9]{12})(?P<cmd>\.json|/[a-zA-Z0-9_\-\.]+)$')
  37. REGEX_URL_NODESTATUS = re.compile(
  38. r'status/([a-f0-9]{12})$')
  39. def normalize_ispname(isp):
  40. """Removes all matches on ISP_NORMALIZATIONS."""
  41. isp = isp.strip()
  42. for regex in ISP_NORMALIZATIONS:
  43. isp = regex.sub('', isp).strip()
  44. return isp
  45. class BatcaveHttpRequestHandler(BaseHTTPRequestHandler):
  46. """Handles a single HTTP request to the BATCAVE."""
  47. def __init__(self, request, client_address, sockserver):
  48. self.logger = logging.getLogger('API')
  49. BaseHTTPRequestHandler.__init__(
  50. self, request, client_address, sockserver)
  51. def __parse_url_pathquery(self):
  52. """Extracts the query parameters from the request path."""
  53. url = re.match(r'^/(?P<path>.*?)(\?(?P<query>.+))?$', self.path.strip())
  54. if url is None:
  55. logging.warn('Failed to parse URL \'' + str(self.path) + '\'.')
  56. return (None, None)
  57. path = url.group('path')
  58. query = {}
  59. if not url.group('query') is None:
  60. for match in REGEX_QUERYPARAM.finditer(url.group('query')):
  61. query[match.group('key')] = match.group('value')
  62. return (path, query)
  63. def do_GET(self):
  64. """Handles all HTTP GET requests."""
  65. path, query = self.__parse_url_pathquery()
  66. if path is None:
  67. self.send_error(400, 'Could not parse URL (' + str(self.path) + ')')
  68. return
  69. # / - index page, shows generic help
  70. if path == '':
  71. self.__respond_index(query)
  72. return
  73. # /list - list stored nodes
  74. if path == 'list':
  75. self.__respond_list(query)
  76. return
  77. # /vpn - notification endpoint for gateway's VPN connections
  78. if path == 'vpn':
  79. self.__respond_vpn(query)
  80. return
  81. # /providers
  82. if path == 'providers':
  83. self.__respond_providers(query)
  84. return
  85. # /node/<id>.json - node's data
  86. # /node/<id>/field - return specific field from node's data
  87. match = REGEX_URL_NODEINFO.match(path)
  88. if match is not None:
  89. cmd = match.group('cmd')
  90. nodeid = match.group('id').lower()
  91. if cmd == '.json':
  92. self.__respond_node(nodeid)
  93. else:
  94. self.__respond_nodedetail(nodeid, cmd[1:])
  95. return
  96. # /status/<id> - node's status
  97. match = REGEX_URL_NODESTATUS.match(path)
  98. if match is not None:
  99. self.__respond_nodestatus(match.group(1))
  100. return
  101. # no match -> 404
  102. self.send_error(404, 'The URL \'{0}\' was not found here.'.format(path))
  103. def do_POST(self):
  104. """Handles all HTTP POST requests."""
  105. path, query = self.__parse_url_pathquery()
  106. if path is None:
  107. self.send_error(400, 'Could not parse URL (' + str(self.path) + ')')
  108. return
  109. params = self.__parse_post_params()
  110. # node id/mac to name mapping
  111. if path == 'idmac2name':
  112. self.__respond_nodeidmac2name(params)
  113. return
  114. # no match -> 404
  115. self.send_error(404, 'The URL \'{0}\' was not found here.'.format(path))
  116. def __send_nocache_headers(self):
  117. """
  118. Sets HTTP headers indicating that this response shall not be cached.
  119. """
  120. self.send_header('Cache-Control', 'no-cache, no-store, must-revalidate')
  121. self.send_header('Pragma', 'no-cache')
  122. self.send_header('Expires', '0')
  123. def __send_headers(self,
  124. content_type='text/html; charset=utf-8',
  125. nocache=True, extra={}):
  126. """Send HTTP 200 Response header with the given Content-Type.
  127. Optionally send no-caching headers, too."""
  128. self.send_response(200)
  129. self.send_header('Content-Type', content_type)
  130. if nocache:
  131. self.__send_nocache_headers()
  132. for key in extra:
  133. self.send_header(key, extra[key])
  134. self.end_headers()
  135. def __parse_post_params(self):
  136. ctype, pdict = cgi.parse_header(self.headers.getheader('content-type'))
  137. if ctype == 'multipart/form-data':
  138. postvars = cgi.parse_multipart(self.rfile, pdict)
  139. elif ctype == 'application/x-www-form-urlencoded':
  140. length = int(self.headers.getheader('content-length'))
  141. postvars = cgi.parse_qs(
  142. self.rfile.read(length),
  143. keep_blank_values=1,
  144. )
  145. else:
  146. postvars = {}
  147. return postvars
  148. def __respond_index(self, query):
  149. """Display the index page."""
  150. storage = self.server.storage
  151. self.__send_headers()
  152. self.wfile.write('<!DOCTYPE html><html><head><title>BATCAVE</title></head>\n')
  153. self.wfile.write('<body>\n')
  154. self.wfile.write('<H1 title="Batman/Alfred Transmission Collection, Aggregation & Value Engine">BATCAVE</H1>\n')
  155. self.wfile.write('<p>Dies ist ein interner Hintergrund-Dienst. Er wird nur von anderen Diensten\n')
  156. self.wfile.write('angesprochen und sollte aus einer Mehrzahl von Gr&uuml;nden nicht &ouml;ffentlich\n')
  157. self.wfile.write('zug&auml;nglich sein.</p>\n')
  158. self.wfile.write('<H2>Status</H2>\n')
  159. self.wfile.write('Daten: <span id="datacount" class="value">')
  160. self.wfile.write(len(storage.data))
  161. self.wfile.write('</span>\n')
  162. self.wfile.write('<H2>API</H2>\n')
  163. self.wfile.write('<p>Grundsätzlich ist das Antwort-Format JSON und alle Daten sind Live-Daten (kein Cache) die ggf. etwas Bearbeitungs-Zeit erfordern.</p>')
  164. self.wfile.write('<dl>\n')
  165. self.wfile.write('<dt><a href="/nodes.json">nodes.json</a></dt><dd>zur Verwendung mit ffmap (MACs anonymisiert)</dd>\n')
  166. self.wfile.write('<dt><a href="/node/ff00ff00ff00.json">/node/&lt;id&gt;.json</a></dt><dd><u>alle</u> vorhandenen Information zu der gewünschten Node</dd>\n')
  167. self.wfile.write('</dl>\n')
  168. self.wfile.write('</body></html>')
  169. def __respond_list(self, query):
  170. """List stored data."""
  171. self.__send_headers()
  172. self.wfile.write('<!DOCTYPE html><html>\n')
  173. self.wfile.write('<head><title>BATCAVE</title></head>\n')
  174. self.wfile.write('<body>\n')
  175. self.wfile.write('<H1>BATCAVE - LIST</H1>\n')
  176. self.wfile.write('<table>\n')
  177. self.wfile.write('<thead><tr><th>ID</th><th>Name</th></tr></thead>\n')
  178. self.wfile.write('<tbody>\n')
  179. sortkey = query['sort'] if 'sort' in query else None
  180. data = self.server.storage.get_nodes(sortby=sortkey)
  181. for node in data:
  182. nodeid = node['node_id']
  183. nodename = node['hostname'] if 'hostname' in node else '&lt;?&gt;'
  184. self.wfile.write('<tr>\n')
  185. self.wfile.write(' <td><a href="/node/{0}.json">{0}</a></td>\n'.format(nodeid))
  186. self.wfile.write(' <td>{0}</td>\n'.format(nodename))
  187. self.wfile.write('</tr>\n')
  188. self.wfile.write('</tbody>\n')
  189. self.wfile.write('</table>\n')
  190. def __respond_node(self, rawid):
  191. """Display node data."""
  192. # search node by the given id
  193. node = self.server.storage.find_node(rawid)
  194. # handle unknown nodes
  195. if node is None:
  196. self.send_error(404, 'No node with id \'' + rawid + '\' present.')
  197. return
  198. # dump node data as JSON
  199. self.__send_headers('application/json',
  200. extra={'Content-Disposition': 'inline'})
  201. self.wfile.write(json.dumps(node))
  202. def __respond_nodestatus(self, rawid):
  203. """Display node status."""
  204. status = self.server.storage.get_nodestatus(rawid)
  205. if status is None:
  206. self.send_error(404, 'No node with id \'' + rawid + '\' present.')
  207. self.__send_headers('text/plain')
  208. self.wfile.write(status)
  209. def __respond_nodeidmac2name(self, ids):
  210. """Return a mapping of the given IDs (or MACs) into their hostname."""
  211. self.__send_headers('text/plain')
  212. for nodeid in ids:
  213. node = None
  214. if not ':' in nodeid:
  215. node = self.server.storage.find_node(nodeid)
  216. else:
  217. node = self.server.storage.find_node_by_mac(nodeid)
  218. nodename = node.get('hostname', nodeid) if node is not None else nodeid
  219. self.wfile.write('{0}={1}\n'.format(nodeid, nodename))
  220. def __respond_nodedetail(self, nodeid, field):
  221. """
  222. Return a field from the given node.
  223. String and integers are returned as text/plain,
  224. all other as JSON.
  225. """
  226. node = self.server.storage.find_node(nodeid)
  227. if node is None:
  228. self.send_error(404, 'No node with id \'' + nodeid + '\' present.')
  229. return
  230. return_count = False
  231. if field.endswith('.count'):
  232. return_count = True
  233. field = field[0:-6]
  234. if not field in node:
  235. self.send_error(
  236. 404,
  237. 'The node \'{0}\' does not have a field named \'{1}\'.'.format(
  238. nodeid, field
  239. )
  240. )
  241. return
  242. value = node[field]
  243. if return_count:
  244. value = len(value)
  245. no_json = isinstance(value, basestring) or isinstance(value, int)
  246. self.__send_headers('text/plain' if no_json else 'application/json',
  247. extra={'Content-Disposition': 'inline'})
  248. self.wfile.write(value if no_json else json.dumps(value))
  249. def __respond_vpn(self, query):
  250. storage = self.server.storage
  251. peername = query.get('peer')
  252. key = query.get('key')
  253. action = query.get('action')
  254. remote = query.get('remote')
  255. gateway = query.get('gw')
  256. timestamp = query.get('ts', time.time())
  257. if action == 'list':
  258. self.__respond_vpnlist()
  259. return
  260. if action != 'establish' and action != 'disestablish':
  261. self.logger.error('VPN: unknown action \'{0}\''.format(action))
  262. self.send_error(400, 'Invalid action.')
  263. return
  264. check = {
  265. 'peername': peername,
  266. 'key': key,
  267. 'remote': remote,
  268. 'gw': gateway,
  269. }
  270. for k, val in check.items():
  271. if val is None or len(val.strip()) == 0:
  272. self.logger.error('VPN {0}: no or empty {1}'.format(action, k))
  273. self.send_error(400, 'Missing value for ' + str(k))
  274. return
  275. try:
  276. if action == 'establish':
  277. self.server.storage.log_vpn_connect(
  278. key, peername, remote, gateway, timestamp)
  279. elif action == 'disestablish':
  280. self.server.storage.log_vpn_connect(key, gateway, timestamp)
  281. else:
  282. self.logger.error('Unknown VPN action \'%s\' not filtered.',
  283. action)
  284. self.send_error(500)
  285. return
  286. except ffstatus.exceptions.VpnKeyFormatError:
  287. self.logger.error('VPN peer \'{0}\' {1}: bad key \'{2}\''.format(
  288. peername, action, key,
  289. ))
  290. self.send_error(400, 'Bad key.')
  291. return
  292. self.__send_headers('text/plain')
  293. self.wfile.write('OK')
  294. storage.save()
  295. def __respond_vpnlist(self):
  296. self.__send_headers()
  297. self.wfile.write('<!DOCTYPE html>\n')
  298. self.wfile.write('<html><head><title>BATCAVE - VPN LIST</title></head>\n')
  299. self.wfile.write('<body>\n')
  300. self.wfile.write('<style type="text/css">\n')
  301. self.wfile.write('table { border: 2px solid #999; border-collapse: collapse; }\n')
  302. self.wfile.write('th, td { border: 1px solid #CCC; }\n')
  303. self.wfile.write('table tbody tr.online { background-color: #CFC; }\n')
  304. self.wfile.write('table tbody tr.offline { background-color: #FCC; }\n')
  305. self.wfile.write('</style>\n')
  306. self.wfile.write('<table>\n')
  307. gateways = self.server.storage.get_vpn_gateways()
  308. self.wfile.write('<thead>\n')
  309. self.wfile.write('<tr><th rowspan="2">names (key)</th><th colspan="' + str(len(gateways)) + '">active</th><th colspan="' + str(len(gateways)) + '">last</th></tr>\n')
  310. self.wfile.write('<tr><th>' + '</th><th>'.join(gateways) + '</th><th>' + '</th><th>'.join(gateways) + '</th></tr>\n')
  311. self.wfile.write('</thead>\n')
  312. for item in self.server.storage.get_vpn_connections():
  313. self.wfile.write('<tr class="{0}">'.format('online' if item['online'] else 'offline'))
  314. self.wfile.write('<td title="{0}">{1}</td>'.format(
  315. item['key'],
  316. ' / '.join(item['names']) if len(item['names']) > 0 else '?',
  317. ))
  318. for conntype in ['active', 'last']:
  319. for gateway in gateways:
  320. remote = ''
  321. if conntype in item['remote'] and gateway in item['remote'][conntype]:
  322. remote = item['remote'][conntype][gateway]
  323. if isinstance(remote, dict):
  324. remote = remote['name']
  325. symbol = '&check;' if len(remote) > 0 else '&times;'
  326. self.wfile.write('<td title="{0}">{1}</td>'.format(
  327. remote, symbol))
  328. self.wfile.write('</tr>\n')
  329. self.wfile.write('</table>\n')
  330. self.wfile.write('</body>')
  331. self.wfile.write('</html>')
  332. def __respond_providers(self, query):
  333. """Return a summary of providers."""
  334. outputformat = query['format'].lower() if 'format' in query else 'html'
  335. isps = {}
  336. ispblocks = {}
  337. for item in self.server.storage.get_vpn_connections():
  338. if item['count']['active'] == 0:
  339. continue
  340. remotes = []
  341. for gateway in item['remote']['active']:
  342. remote = item['remote']['active'][gateway]
  343. remotes.append(remote)
  344. if len(remotes) == 0:
  345. self.logger.warn(
  346. 'VPN key \'%s\' is marked with active remotes but 0 found?',
  347. item['key'])
  348. continue
  349. item_isps = set()
  350. for remote in remotes:
  351. isp = "UNKNOWN"
  352. ispblock = remote
  353. if isinstance(remote, dict):
  354. ispblock = remote['name']
  355. desc_lines = remote['description'].split('\n')
  356. isp = normalize_ispname(desc_lines[0])
  357. if not isp in ispblocks:
  358. ispblocks[isp] = set()
  359. ispblocks[isp].add(ispblock)
  360. item_isps.add(isp)
  361. if len(item_isps) == 0:
  362. item_isps.add('unknown')
  363. elif len(item_isps) > 1:
  364. self.logger.warn(
  365. 'VPN key \'%s\' has %d active IPs which resolved to %d ISPs: \'%s\'',
  366. item['key'],
  367. len(remotes),
  368. len(item_isps),
  369. '\', \''.join(item_isps)
  370. )
  371. for isp in item_isps:
  372. if not isp in isps:
  373. isps[isp] = 0
  374. isps[isp] += 1.0 / len(item_isps)
  375. isps_sum = sum([isps[x] for x in isps])
  376. if outputformat == 'csv':
  377. self.__send_headers('text/csv')
  378. self.wfile.write('Count;Name\n')
  379. for isp in isps:
  380. self.wfile.write('{0};"{1}"\n'.format(isps[isp], isp))
  381. elif outputformat == 'json':
  382. self.__send_headers('application/json',
  383. extra={'Content-Disposition': 'inline'})
  384. data = [
  385. {
  386. 'name': isp,
  387. 'count': isps[isp],
  388. 'percentage': isps[isp]*100.0/isps_sum,
  389. 'blocks': [block for block in ispblocks[isp]],
  390. } for isp in isps
  391. ]
  392. self.wfile.write(json.dumps(data))
  393. elif outputformat == 'html':
  394. self.send_headers()
  395. self.wfile.write('<!DOCTYPE html><html>\n')
  396. self.wfile.write('<head><title>BATCAVE - PROVIDERS</title></head>\n')
  397. self.wfile.write('<body>\n')
  398. self.wfile.write('<table border="2">\n')
  399. self.wfile.write('<thead><tr><th>Count</th><th>Percentage</th><th>Name</th><th>Blocks</th></tr></thead>\n')
  400. self.wfile.write('<tbody>\n')
  401. for isp in sorted(isps, key=lambda x: isps[x], reverse=True):
  402. self.wfile.write('<tr><td>{0}</td><td>{1:.1f}%</td><td>{2}</td><td>{3}</td></tr>\n'.format(
  403. isps[isp],
  404. isps[isp]*100.0/isps_sum,
  405. isp,
  406. ', '.join(sorted(ispblocks[isp])) if isp in ispblocks else '?',
  407. ))
  408. self.wfile.write('</tbody></table>\n')
  409. self.wfile.write('<p>Totals: {0} ISPs, {1} connections</p>\n'.format(len(isps), isps_sum))
  410. self.wfile.write('</body></html>')
  411. else:
  412. self.send_error(400, 'Unknown output format.')
  413. class ApiServer(ThreadingMixIn, HTTPServer):
  414. def __init__(self, endpoint, storage):
  415. if ':' in endpoint[0]:
  416. self.address_family = socket.AF_INET6
  417. HTTPServer.__init__(self, endpoint, BatcaveHttpRequestHandler)
  418. self.storage = storage
  419. def __str__(self):
  420. return 'ApiServer on {0}'.format(self.server_address)
  421. if __name__ == '__main__':
  422. dummystorage = ffstatus.basestorage.BaseStorage()
  423. server = ApiServer(('0.0.0.0', 8888), dummystorage)
  424. print("Server:", str(server))
  425. server.serve_forever()