123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607 |
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- from __future__ import print_function, unicode_literals
- from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
- import cgi
- import json
- import logging
- import re
- import socket
- from SocketServer import ThreadingMixIn
- import time
- import ffstatus
- # each match of these regex is removed to normalize an ISP's name
- ISP_NORMALIZATIONS = [
- # normalize name: strip company indication
- re.compile(r'(AG|UG|G?mbH( ?& ?Co\.? ?(OH|K)G)?)$', flags=re.IGNORECASE),
- # normalize name: strip "pool" suffixes
- re.compile(r'(dynamic )?(customer |subscriber )?(ip )?(pool|(address )?range|addresses)$', flags=re.IGNORECASE),
- # normalize name: strip "B2B" and aggregation suffixes
- re.compile(r'(aggregate|aggregation)?$', flags=re.IGNORECASE),
- re.compile(r'(B2B)?$', flags=re.IGNORECASE),
- # normalize name: strip country suffixes (in Germany)
- re.compile(r'(' +
- 'DE|Deutschland|Germany|' +
- 'Nordrhein[- ]Westfalen|NRW|' +
- 'Baden[- ]Wuerttemburg|BW|' +
- 'Hessen|' +
- 'Niedersachsen|' +
- 'Rheinland[- ]Pfalz|RLP' +
- ')$',
- flags=re.IGNORECASE),
- ]
- REGEX_QUERYPARAM = re.compile(
- r'(?P<key>.+?)=(?P<value>.+?)(&|$)')
- REGEX_URL_NODEINFO = re.compile(
- r'node/(?P<id>[a-fA-F0-9]{12})(?P<cmd>\.json|/[a-zA-Z0-9_\-\.]+)$')
- REGEX_URL_NODESTATUS = re.compile(
- r'status/([a-f0-9]{12})$')
- def normalize_ispname(isp):
- """Removes all matches on ISP_NORMALIZATIONS."""
- isp = isp.strip()
- for regex in ISP_NORMALIZATIONS:
- isp = regex.sub('', isp).strip()
- return isp
- class BatcaveHttpRequestHandler(BaseHTTPRequestHandler):
- """Handles a single HTTP request to the BATCAVE."""
- def __init__(self, request, client_address, sockserver):
- self.logger = logging.getLogger('API')
- BaseHTTPRequestHandler.__init__(
- self, request, client_address, sockserver)
- def __parse_url_pathquery(self):
- """Extracts the query parameters from the request path."""
- url = re.match(r'^/(?P<path>.*?)(\?(?P<query>.+))?$', self.path.strip())
- if url is None:
- logging.warn('Failed to parse URL \'' + str(self.path) + '\'.')
- return (None, None)
- path = url.group('path')
- query = {}
- if not url.group('query') is None:
- for match in REGEX_QUERYPARAM.finditer(url.group('query')):
- query[match.group('key')] = match.group('value')
- return (path, query)
- def do_GET(self):
- """Handles all HTTP GET requests."""
- path, query = self.__parse_url_pathquery()
- if path is None:
- self.send_error(400, 'Could not parse URL (' + str(self.path) + ')')
- return
- # / - index page, shows generic help
- if path == '':
- self.__respond_index(query)
- return
- # /nodes.json
- if path == 'nodes.json':
- self.__respond_nodes(query)
- return
- # /list - list stored nodes
- if path == 'list':
- self.__respond_list(query)
- return
- # /vpn - notification endpoint for gateway's VPN connections
- if path == 'vpn':
- self.__respond_vpn(query)
- return
- # /providers
- if path == 'providers':
- self.__respond_providers(query)
- return
- # /node/<id>.json - node's data
- # /node/<id>/field - return specific field from node's data
- match = REGEX_URL_NODEINFO.match(path)
- if match is not None:
- cmd = match.group('cmd')
- nodeid = match.group('id').lower()
- if cmd == '.json':
- self.__respond_node(nodeid)
- else:
- self.__respond_nodedetail(nodeid, cmd[1:])
- return
- # /status - overall status (incl. node and client count)
- if path == 'status':
- self.__respond_status()
- return
- # /status/<id> - node's status
- match = REGEX_URL_NODESTATUS.match(path)
- if match is not None:
- self.__respond_nodestatus(match.group(1))
- return
- # no match -> 404
- self.send_error(404, 'The URL \'{0}\' was not found here.'.format(path))
- def do_POST(self):
- """Handles all HTTP POST requests."""
- path, query = self.__parse_url_pathquery()
- if path is None:
- self.send_error(400, 'Could not parse URL (' + str(self.path) + ')')
- return
- params = self.__parse_post_params()
- # node id/mac to name mapping
- if path == 'idmac2name':
- self.__respond_nodeidmac2name(params)
- return
- # no match -> 404
- self.send_error(404, 'The URL \'{0}\' was not found here.'.format(path))
- def __send_nocache_headers(self):
- """
- Sets HTTP headers indicating that this response shall not be cached.
- """
- self.send_header('Cache-Control', 'no-cache, no-store, must-revalidate')
- self.send_header('Pragma', 'no-cache')
- self.send_header('Expires', '0')
- def __send_headers(self,
- content_type='text/html; charset=utf-8',
- nocache=True, extra={}):
- """Send HTTP 200 Response header with the given Content-Type.
- Optionally send no-caching headers, too."""
- self.send_response(200)
- self.send_header('Content-Type', content_type)
- if nocache:
- self.__send_nocache_headers()
- for key in extra:
- self.send_header(key, extra[key])
- self.end_headers()
- def __parse_post_params(self):
- ctype, pdict = cgi.parse_header(self.headers.getheader('content-type'))
- if ctype == 'multipart/form-data':
- postvars = cgi.parse_multipart(self.rfile, pdict)
- elif ctype == 'application/x-www-form-urlencoded':
- length = int(self.headers.getheader('content-length'))
- postvars = cgi.parse_qs(
- self.rfile.read(length),
- keep_blank_values=1,
- )
- else:
- postvars = {}
- return postvars
- def __respond_index(self, query):
- """Display the index page."""
- self.__send_headers()
- index_page = '''<!DOCTYPE html>
- <html><head><title>BATCAVE</title></head>
- <body>
- <H1 title="Batman/Alfred Transmission Collection, Aggregation & Value Engine">
- BATCAVE
- </H1>
- <p>Dies ist ein interner Hintergrund-Dienst. Er wird nur von anderen Diensten
- angesprochen und sollte aus einer Mehrzahl von Gründen nicht
- öffentlich zugänglich sein.</p>
- <H2>API</H2>
- <p>
- Grundsätzlich ist das Antwort-Format JSON und alle Daten sind
- Live-Daten (kein Cache) die ggf. etwas Bearbeitungs-Zeit erfordern.
- </p>
- <dl>
- <dt>GET <a href="/nodes.json">nodes.json</a></dt>
- <dd>zur Verwendung mit ffmap (MACs anonymisiert)</dd>
- <dt>GET /node/<id>.json</dt>
- <dd>alle Daten zu dem gewünschten Knoten</dd>
- <dt>GET /providers?format=json</dt>
- <dd>Liste der Provider</dd>
- <dt>GET <a href="/status">/status</a></dt>
- <dd>Status der BATCAVE inkl. Zahl der Nodes+Clients (JSON)</dd>
- <dt>GET /status/<id></dt>
- <dd>Status des Knotens</dd>
- </dl>
- </body></html>'''
- self.wfile.write(index_page)
- def __respond_list(self, query):
- """List stored data."""
- self.__send_headers()
- self.wfile.write('<!DOCTYPE html><html>\n')
- self.wfile.write('<head><title>BATCAVE</title></head>\n')
- self.wfile.write('<body>\n')
- self.wfile.write('<H1>BATCAVE - LIST</H1>\n')
- self.wfile.write('<table>\n')
- self.wfile.write('<thead><tr><th>ID</th><th>Name</th></tr></thead>\n')
- self.wfile.write('<tbody>\n')
- sortkey = query['sort'] if 'sort' in query else None
- data = self.server.storage.get_nodes(sortby=sortkey)
- for node in data:
- nodeid = node.get('node_id')
- nodename = node.get('hostname', '<?>')
- self.wfile.write('<tr>\n')
- self.wfile.write(' <td><a href="/node/{0}.json">{0}</a></td>\n'.format(nodeid))
- self.wfile.write(' <td>{0}</td>\n'.format(nodename))
- self.wfile.write('</tr>\n')
- self.wfile.write('</tbody>\n')
- self.wfile.write('</table>\n')
- def __map_item(self, haystack, needle, prefix=None):
- if not isinstance(haystack, dict):
- raise Exception("haystack must be a dict")
- if needle in haystack:
- return haystack[needle]
- idx = len(haystack) + 1
- name = prefix + str(idx)
- while name in haystack:
- idx += 1
- name = prefix + str(idx)
- haystack[needle] = name
- return name
- def __respond_nodes(self, query):
- indent = 2 if query.get('pretty', 0) == '1' else None
- nodes = []
- clientmapping = {}
- for node in self.server.storage.get_nodes():
- entry = {
- 'id': node.get('node_id'),
- 'name': node.get('hostname'),
- 'clients': [self.__map_item(clientmapping, x, "c")
- for x in node.get('clients', [])],
- }
- geo = node.get('location', None)
- if geo is not None:
- entry['geo'] = [geo['latitude'], geo['longitude']]
- nodes.append(entry)
- result = {'nodes': nodes}
- self.__send_headers(content_type='application/json',
- extra={'Content-Disposition': 'inline'})
- self.wfile.write(json.dumps(result, indent=indent))
- def __respond_node(self, rawid):
- """Display node data."""
- # search node by the given id
- node = self.server.storage.find_node(rawid)
- # handle unknown nodes
- if node is None:
- self.send_error(404, 'No node with id \'' + rawid + '\' present.')
- return
- # dump node data as JSON
- self.__send_headers('application/json',
- extra={'Content-Disposition': 'inline'})
- self.wfile.write(json.dumps(node))
- def __respond_nodestatus(self, rawid):
- """Display node status."""
- status = self.server.storage.get_nodestatus(rawid)
- if status is None:
- self.send_error(404, 'No node with id \'' + rawid + '\' present.')
- self.__send_headers('text/plain')
- self.wfile.write(status)
- def __respond_nodeidmac2name(self, ids):
- """Return a mapping of the given IDs (or MACs) into their hostname."""
- self.__send_headers('text/plain')
- for nodeid in ids:
- node = None
- if not ':' in nodeid:
- node = self.server.storage.find_node(nodeid)
- else:
- node = self.server.storage.find_node_by_mac(nodeid)
- nodename = node.get('hostname', nodeid) if node is not None else nodeid
- self.wfile.write('{0}={1}\n'.format(nodeid, nodename))
- def __respond_nodedetail(self, nodeid, field):
- """
- Return a field from the given node.
- String and integers are returned as text/plain,
- all other as JSON.
- """
- node = self.server.storage.find_node(nodeid)
- if node is None:
- self.send_error(404, 'No node with id \'' + nodeid + '\' present.')
- return
- return_count = False
- if field.endswith('.count'):
- return_count = True
- field = field[0:-6]
- if not field in node:
- self.send_error(
- 404,
- 'The node \'{0}\' does not have a field named \'{1}\'.'.format(
- nodeid, field
- )
- )
- return
- value = node[field]
- if return_count:
- value = len(value)
- no_json = isinstance(value, basestring) or isinstance(value, int)
- self.__send_headers('text/plain' if no_json else 'application/json',
- extra={'Content-Disposition': 'inline'})
- self.wfile.write(value if no_json else json.dumps(value))
- def __respond_status(self):
- status = self.server.storage.status
- self.__send_headers('application/json',
- extra={'Content-Disposition': 'inline'})
- self.wfile.write(json.dumps(status, indent=2))
- def __respond_vpn(self, query):
- storage = self.server.storage
- peername = query.get('peer')
- key = query.get('key')
- action = query.get('action')
- remote = query.get('remote')
- gateway = query.get('gw')
- timestamp = query.get('ts', time.time())
- if action == 'list':
- self.__respond_vpnlist()
- return
- if action != 'establish' and action != 'disestablish':
- self.logger.error('VPN: unknown action \'{0}\''.format(action))
- self.send_error(400, 'Invalid action.')
- return
- check = {
- 'peername': peername,
- 'key': key,
- 'remote': remote,
- 'gw': gateway,
- }
- for k, val in check.items():
- if val is None or len(val.strip()) == 0:
- self.logger.error('VPN {0}: no or empty {1}'.format(action, k))
- self.send_error(400, 'Missing value for ' + str(k))
- return
- try:
- if action == 'establish':
- self.server.storage.log_vpn_connect(
- key, peername, remote, gateway, timestamp)
- elif action == 'disestablish':
- self.server.storage.log_vpn_disconnect(key, gateway, timestamp)
- else:
- self.logger.error('Unknown VPN action \'%s\' not filtered.',
- action)
- self.send_error(500)
- return
- except ffstatus.exceptions.VpnKeyFormatError:
- self.logger.error('VPN peer \'{0}\' {1}: bad key \'{2}\''.format(
- peername, action, key,
- ))
- self.send_error(400, 'Bad key.')
- return
- self.__send_headers('text/plain')
- self.wfile.write('OK')
- storage.save()
- def __respond_vpnlist(self):
- self.__send_headers()
- self.wfile.write('''<!DOCTYPE html>
- <html><head><title>BATCAVE - VPN LIST</title></head>
- <body>
- <style type="text/css">
- table { border: 2px solid #999; border-collapse: collapse; }
- th, td { border: 1px solid #CCC; }
- table tbody tr.online { background-color: #CFC; }
- table tbody tr.offline { background-color: #FCC; }
- </style>
- <table>''')
- gateways = self.server.storage.get_vpn_gateways()
- gws_header = '<th>' + '</th><th>'.join(gateways) + '</th>'
- self.wfile.write('<thead>\n')
- self.wfile.write('<tr><th rowspan="2">names (key)</th>')
- self.wfile.write('<th colspan="' + str(len(gateways)) + '">active</th>')
- self.wfile.write('<th colspan="' + str(len(gateways)) + '">last</th>')
- self.wfile.write('</tr>\n')
- self.wfile.write('<tr>' + gws_header + gws_header + '</tr>\n')
- self.wfile.write('</thead>\n')
- for item in self.server.storage.get_vpn_connections():
- row_class = 'online' if item['online'] else 'offline'
- self.wfile.write('<tr class="{0}">'.format(row_class))
- self.wfile.write('<td title="{0}">{1}</td>'.format(
- item['key'],
- ' / '.join(item['names']) if len(item['names']) > 0 else '?',
- ))
- for conntype in ['active', 'last']:
- for gateway in gateways:
- remote = ''
- if conntype in item['remote'] and \
- gateway in item['remote'][conntype]:
- remote = item['remote'][conntype][gateway]
- if isinstance(remote, dict):
- remote = remote['name']
- symbol = '✓' if len(remote) > 0 else '×'
- self.wfile.write('<td title="{0}">{1}</td>'.format(
- remote, symbol))
- self.wfile.write('</tr>\n')
- self.wfile.write('</table>\n')
- self.wfile.write('</body>')
- self.wfile.write('</html>')
- def __respond_providers(self, query):
- """Return a summary of providers."""
- outputformat = query['format'].lower() if 'format' in query else 'html'
- isps = {}
- ispblocks = {}
- for item in self.server.storage.get_vpn_connections():
- if item['count']['active'] == 0:
- continue
- remotes = []
- for gateway in item['remote']['active']:
- remote = item['remote']['active'][gateway]
- remotes.append(remote)
- if len(remotes) == 0:
- self.logger.warn(
- 'VPN key \'%s\' is marked with active remotes but 0 found?',
- item['key'])
- continue
- item_isps = set()
- for remote in remotes:
- isp = "UNKNOWN"
- ispblock = remote
- if isinstance(remote, dict):
- ispblock = remote['name']
- desc_lines = remote['description'].split('\n')
- isp = normalize_ispname(desc_lines[0])
- if not isp in ispblocks:
- ispblocks[isp] = set()
- ispblocks[isp].add(ispblock)
- item_isps.add(isp)
- if len(item_isps) == 0:
- item_isps.add('unknown')
- elif len(item_isps) > 1:
- self.logger.warn(
- 'VPN key \'%s\' has %d active IPs ' +
- 'which resolved to %d ISPs: \'%s\'',
- item['key'],
- len(remotes),
- len(item_isps),
- '\', \''.join(item_isps)
- )
- for isp in item_isps:
- if not isp in isps:
- isps[isp] = 0
- isps[isp] += 1.0 / len(item_isps)
- isps_sum = sum([isps[x] for x in isps])
- if outputformat == 'csv':
- self.__send_headers('text/csv')
- self.wfile.write('Count;Name\n')
- for isp in isps:
- self.wfile.write('{0};"{1}"\n'.format(isps[isp], isp))
- elif outputformat == 'json':
- self.__send_headers('application/json',
- extra={'Content-Disposition': 'inline'})
- data = [
- {
- 'name': isp,
- 'count': isps[isp],
- 'percentage': isps[isp]*100.0/isps_sum,
- 'blocks': [block for block in ispblocks[isp]],
- } for isp in isps
- ]
- self.wfile.write(json.dumps(data))
- elif outputformat == 'html':
- self.__send_headers()
- self.wfile.write('''<!DOCTYPE html>
- <html>
- <head><title>BATCAVE - PROVIDERS</title></head>
- <body>
- <table border="2">
- <thead>
- <tr><th>Count</th><th>Percentage</th><th>Name</th><th>Blocks</th></tr>
- </thead>
- <tbody>\n''')
- for isp in sorted(isps, key=lambda x: isps[x], reverse=True):
- self.wfile.write('<tr><td>{0}</td><td>{1:.1f}%</td><td>{2}</td><td>{3}</td></tr>\n'.format(
- isps[isp],
- isps[isp]*100.0/isps_sum,
- isp,
- ', '.join(sorted(ispblocks[isp])) if isp in ispblocks else '?',
- ))
- self.wfile.write('</tbody></table>\n')
- self.wfile.write('<p>Totals: {0} ISPs, {1} connections</p>\n'.format(len(isps), isps_sum))
- self.wfile.write('</body></html>')
- else:
- self.send_error(400, 'Unknown output format.')
- class ApiServer(ThreadingMixIn, HTTPServer):
- def __init__(self, endpoint, storage):
- if ':' in endpoint[0]:
- self.address_family = socket.AF_INET6
- HTTPServer.__init__(self, endpoint, BatcaveHttpRequestHandler)
- self.storage = storage
- def __str__(self):
- return 'ApiServer on {0}'.format(self.server_address)
- if __name__ == '__main__':
- dummystorage = ffstatus.basestorage.BaseStorage()
- server = ApiServer(('0.0.0.0', 8888), dummystorage)
- print("Server:", str(server))
- server.serve_forever()
|