#!/usr/bin/python3 # # Check state of BGP sessions in Bird Internet Routing Daemon # # Maximilian Wilhelm # -- Thu 13 Apr 2017 12:04:13 PM CEST # import argparse import os import re import subprocess import sys def read_sessions_from_file (file_path, missing_ok): sessions = [] # If we shouldn't care, we won't care if it's not there. if not os.path.isfile (file_path) and missing_ok: return sessions try: with open (args.sessions_down_ok_file, 'r') as ido_fh: for session in ido_fh.readlines (): if not session.startswith ('#'): sessions.append (session.strip ()) except IOError as err: errno, strerror = err.args print ("Failed to read sessions_down_ok from '%s': %s" % (args.sessions_down_ok_file, strerror)) sys.exit (1) return sessions def validate_range_arg (arg_name): value = getattr (args, arg_name) if not value: return None # Check if a RANGE was given limits = value.split (':') if len (limits) != 2: return "Error: Invalid value for --%s, expected RANGE: %s" % (arg_name, value) # Try to validate range, on limit might be empty try: # Try to parse range values to integers if present a = None b = None if (limits[0] != ''): a = int (limits[0]) if (limits[1] != ''): b = int (limits[1]) # Validate range if both values were given if (a != None and b != None and a > b): return "Error: Invalid value for --%s, invalid RANGE: %s" % (arg_name, value) except ValueError: return "Error: Expected numeric values in RANGE for --%s: %s" % (arg_name, value) ################################################################################ # Argument parsing and basic input validation # ################################################################################ parser = argparse.ArgumentParser (description = 'check bird iBGP sessions') parser.add_argument ('--proto', '-p', help = 'IP protocol version to check', default = '4', choices = ['4', '6']) parser.add_argument ('--asn', '-A', help = "Local AS number", required = True) parser.add_argument ('--ibgp', '-i', help = "Check iBGP sessions", action = 'store_true') parser.add_argument ('--ibgp_warn', '--ibgp_w', help = "Warning interval for down iBGP sessions", default = "1:1", metavar = "RANGE") parser.add_argument ('--ibgp_crit', '--ibgp_c', help = "Critical interval for down iBGP sessions", default = "2:", metavar = "RANGE") parser.add_argument ('--ebgp', '-e', help = "Check eBGP sessions", action = 'store_true') parser.add_argument ('--ebgp_warn', '--ebgp_w', help = "Warning interval for down eBGP sessions", default = "1:1", metavar = "RANGE") parser.add_argument ('--ebgp_crit', '--ebgp_c', help = "Critical interval for down eBGP sessions", default = "2:", metavar = "RANGE") parser.add_argument ('--disabled_ok', help = "Treat sessions disabled in bird as OK.", action = 'store_true') parser.add_argument ('--sessions_down_ok', metavar = "LIST", help = "List of sessions which are OK to be down. Provide a space separated list.") parser.add_argument ('--sessions_down_ok_file', metavar = "FILENAME", help = "List of sessions which are OK to be down. Provide one interfaces per line.") parser.add_argument ('--ignore_missing_file', help = "Ignore a possible non-existent file given as --interfaces_down_ok_file", action = 'store_true') parser.add_argument ('--session', help = "Only check for session with given name.") parser.add_argument ('--routes_imported_warn', help = "Warning interval for imported routes", metavar = "RANGE") parser.add_argument ('--routes_imported_crit', help = "Critical interval for imported routes", metavar = "RANGE") parser.add_argument ('--routes_exported_warn', help = "Warning interval for exported routes", metavar = "RANGE") parser.add_argument ('--routes_exported_crit', help = "Critical interval for exported routes", metavar = "RANGE") parser.add_argument ('--routes_preferred_warn', help = "Warning interval for preferred routes", metavar = "RANGE") parser.add_argument ('--routes_preferred_crit', help = "Critical interval for preferred routes", metavar = "RANGE") args = parser.parse_args () if not args.ibgp and not args.ebgp: print ("Error: You have to enable at least one of iBGP and eBGP checking.\n", file=sys.stderr) parser.print_help () sys.exit (3) if args.session and args.ibgp and args.ebgp: print ("Error: A single session can't be iBGP and eBGP at the same time!") parser.print_help () sys.exit (3) # Validate limit arguments for item in ('ibgp', 'ebgp', 'routes_imported', 'routes_exported', 'routes_preferred'): for severity in ('warn', 'crit'): msg = validate_range_arg ("%s_%s" % (item, severity)) if msg: print (msg) sys.exit (3) session_down_codes = { 'warn' : [ 1, 'WARNING' ], 'crit' : [ 2, 'CRITICAL'], } route_codes = { 'routes_exported' : 'Exported', 'routes_imported' : 'Imported', 'routes_preferred' : 'Preferred', } # Are some sessions ok being down? sessions_down_ok = [] if args.sessions_down_ok: sessions_down_ok = args.sessions_down_ok.split () if args.sessions_down_ok_file: sessions_down_ok.extend (read_sessions_from_file (args.sessions_down_ok_file, args.ignore_missing_file)) ################################################################################ # Query BGP protocols from bird # ################################################################################ cmds = { '4' : '/usr/sbin/birdc', '6' : '/usr/sbin/birdc6', } # Check for one specific session only if args.session: cmd = [ "/usr/bin/sudo", cmds[args.proto], "show protocol all %s" % args.session ] # Check for all sessions and filter later else: cmd = [ "/usr/bin/sudo", cmds[args.proto], "show protocols all" ] try: protocols = subprocess.Popen (cmd, bufsize = 4194304, stdout = subprocess.PIPE).stdout # cmd exited with non-zero code except subprocess.CalledProcessError as c: print ("Failed to run %s: %s" % (" ".join (cmd), c.output)) sys.exit (1) # This should not have happend. except Exception as e: print ("Unknown error while running %s: %s" % (" ".join (cmd), str (e))) sys.exit (3) # cr03_in_ffho_net BGP master up 2017-04-06 Established # Preference: 100 # Input filter: ibgp_in # Output filter: ibgp_out # Routes: 38 imported, 3 exported, 1 preferred # Route change stats: received rejected filtered ignored accepted # Import updates: 16779 0 0 72 16707 # Import withdraws: 18012 0 --- 1355 16657 # Export updates: 55104 18903 24743 --- 11458 # Export withdraws: 9789 --- --- --- 11455 # BGP state: Established # Neighbor address: 10.132.255.3 # Neighbor AS: 65132 # Neighbor ID: 10.132.255.3 # Neighbor caps: refresh enhanced-refresh restart-able AS4 # Session: internal multihop AS4 # Source address: 10.132.255.12 # Hold timer: 198/240 # Keepalive timer: 13/80 ################################################################################ # Parse all fields from bird output into bgp_sessions dict # ################################################################################ bgp_sessions = {} # Simple fields with only one values simple_fields = [ 'Preference', 'Input filter', 'Output filter', 'BGP state', 'Neighbor address', 'Neighbor AS', 'Neighbor ID', 'Source address', 'Hold timer', 'Keepalive timer', 'Last error' ] # More "complex" fields fields = { 'Routes' : { 're' : re.compile (r'Routes:\s+(\d+) imported, (\d+) exported, (\d+) preferred'), 'groups' : [ 1, 2, 3 ], 'mangle_dict' : { 'Routes imported' : 1, 'Routes exported' : 2, 'Routes preferred' : 3, } }, 'Neighbor caps' : { 're' : re.compile (r'Neighbor caps:\s+(.+)$'), 'groups' : [ 1 ], 'list' : True, 'split' : lambda x: x.split (), }, 'Session' : { 're' : re.compile (r'Session:\s+(.+)$'), 'groups' : [ 1 ], 'list' : True, 'split' : lambda x: x.split (), }, } # Generate entries for simple fields for field in simple_fields: fields[field] = { 're' : re.compile (r'^\s*%s:\s+(.+)$' % field), 'groups' : [ 1 ], } proto_re = re.compile (r'^([0-9a-zA-Z_.-]+)\s+BGP\s+') # XXX ignore_re = re.compile (r'^(BIRD [0-9.]+ ready.|name\s+proto\s+table\s+.*)?$') # Parse session list protocol = None proto_dict = None for line in protocols.readlines (): line = line.strip () # Python3 glue if sys.version_info >= (3, 0): line = str (line, encoding='utf-8') # Preamble or empty string if ignore_re.search (line): protocol = None proto_dict = None continue # Start of a new protocol match = proto_re.search (line) if match: protocol = match.group (1) bgp_sessions[protocol] = {} proto_dict = bgp_sessions[protocol] continue # Ignore any non-BGP protocols, empty lines, etc. if protocol == None: continue # Parse and store any interesting lines / fields for field, config in fields.items (): match = config['re'].search (line) if not match: continue # Get values from match values = [] for group in config['groups']: values.append (match.group (group)) # Store entries separately? mangle_dict = config.get ('mangle_dict', None) if mangle_dict: for entry, group in mangle_dict.items (): proto_dict[entry] = match.group (group) # Store as list? if config.get ('list', False) == True: proto_dict[field] = config['split'] (match.group (1)) # Store as string else: proto_dict[field] = " ".join (values) ################################################################################ # Check the status quo # ################################################################################ up = [] down = [] ret_code = 0 down_by_proto = { 'ibgp' : [], 'ebgp' : [] } proto_str = { 'ibgp' : 'iBGP', 'ebgp' : 'eBGP' } sessions_up = {} for protoname, config in sorted (bgp_sessions.items ()): session_args = config.get ('Session', []) # Check if user gave us a remote ASN as local AS if ('external' in session_args) and (config['Neighbor AS'] == args.asn): print ("ERROR: Session %s is eBGP but has our ASN! The given local ASN seems wrong!" % protoname) ret_code = 3 if ('internal' in session_args) and (config['Neighbor AS'] != args.asn): print ("ERROR: Session %s is iBGP but does not have our ASN! The given local ASN seems wrong!" % protoname) ret_code = 3 # Determine session type session_type = "ibgp" if ('external' in session_args) or (config['Neighbor AS'] != args.asn): session_type = "ebgp" remote_as = "I" if session_type == "ibgp" else config.get ('Neighbor AS') session_desc = "%s/%s" % (protoname, remote_as) # Skip iBGP/eBGP sessions when not asked to check them, but check for specific session, if given if (args.ibgp != True and (('internal' in session_args) or (config['Neighbor AS'] == args.asn))) or \ (args.ebgp != True and (('external' in session_args) or (config['Neighbor AS'] != args.asn))): if not args.session: continue expected = "iBGP" if args.ibgp else "eBGP" print ("ERROR: Session %s is %s but %s was expected!" % (args.session, proto_str[session_type], expected)) ret_code = 2 bgp_state = config['BGP state'] if bgp_state == 'Established': up.append (session_desc) sessions_up[session_desc] = config['Routes'] # Session disable and we don't care elif bgp_state == 'Down' and args.disabled_ok: up.append (session_desc + " (Disabled)") # Session down but in session_down_ok* list elif protoname in sessions_down_ok: up.append (session_desc + " (Down/OK)") # Something's broken else: last_error = 'Disabled' if bgp_state == 'Down' else config.get ('Last error', 'unknown') session_desc += " (%s)" % last_error down.append (session_desc) down_by_proto[session_type].append (session_desc) # Check down iBGP / eBGP sessions limits for proto, sessions in down_by_proto.items (): down_sessions = len (sessions) if down_sessions == 0: continue for level in [ 'warn', 'crit' ]: limits = getattr (args, "%s_%s" % (proto, level)).split (":") code, code_name = session_down_codes[level] # Check if number of down sessions is within warning or critical limits if (limits[0] == '' or down_sessions >= int (limits[0])) and \ (limits[1] == '' or down_sessions <= int (limits[1])): if ret_code < code: ret_code = code # Check routes for up sessions for session, routes in sessions_up.items (): session_info = {} session_info['routes_imported'], session_info['routes_exported'], session_info['routes_preferred'] = routes.split (' ') for r_type in route_codes.keys(): for level in [ 'crit', 'warn' ]: try: limits = getattr (args, "%s_%s" % (r_type, level)).split (":") except: pass else: code, code_name = session_down_codes[level] if (limits[0] == '' or int(session_info[r_type]) >= int (limits[0])) and \ (limits[1] == '' or int(session_info[r_type]) <= int (limits[1])): if ret_code < code: ret_code = code print("%s Routes: %s with %s route(s) is %s" % (route_codes[r_type],session,session_info[r_type],code_name)) break # Special handling for session given by name if args.session: # Check is given session name was found if len (bgp_sessions) == 0: print ("ERROR: Given session %s not present in configuration!" % args.session) sys.exit (2) if len (down) > 0: print ("DOWN: %s" % ", ".join (down)) if len (up) > 0: print ("OK: %s" % ", ".join (up)) sys.exit (ret_code)