check_bird_bgp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. #!/usr/bin/python3
  2. #
  3. # Check state of BGP sessions in Bird Internet Routing Daemon
  4. #
  5. # Maximilian Wilhelm <max@rfc2324.org>
  6. # -- Thu 13 Apr 2017 12:04:13 PM CEST
  7. #
  8. import argparse
  9. import os
  10. import re
  11. import subprocess
  12. import sys
  13. def read_sessions_from_file (file_path, missing_ok):
  14. sessions = []
  15. # If we shouldn't care, we won't care if it's not there.
  16. if not os.path.isfile (file_path) and missing_ok:
  17. return sessions
  18. try:
  19. with open (args.sessions_down_ok_file, 'r') as ido_fh:
  20. for session in ido_fh.readlines ():
  21. if not session.startswith ('#'):
  22. sessions.append (session.strip ())
  23. except IOError as err:
  24. errno, strerror = err.args
  25. print ("Failed to read sessions_down_ok from '%s': %s" % (args.sessions_down_ok_file, strerror))
  26. sys.exit (1)
  27. return sessions
  28. def validate_range_arg (arg_name):
  29. value = getattr (args, arg_name)
  30. if not value:
  31. return None
  32. # Check if a RANGE was given
  33. limits = value.split (':')
  34. if len (limits) != 2:
  35. return "Error: Invalid value for --%s, expected RANGE: %s" % (arg_name, value)
  36. # Try to validate range, on limit might be empty
  37. try:
  38. # Try to parse range values to integers if present
  39. a = None
  40. b = None
  41. if (limits[0] != ''):
  42. a = int (limits[0])
  43. if (limits[1] != ''):
  44. b = int (limits[1])
  45. # Validate range if both values were given
  46. if (a != None and b != None and a > b):
  47. return "Error: Invalid value for --%s, invalid RANGE: %s" % (arg_name, value)
  48. except ValueError:
  49. return "Error: Expected numeric values in RANGE for --%s: %s" % (arg_name, value)
  50. ################################################################################
  51. # Argument parsing and basic input validation #
  52. ################################################################################
  53. parser = argparse.ArgumentParser (description = 'check bird iBGP sessions')
  54. parser.add_argument ('--proto', '-p', help = 'IP protocol version to check', default = '4', choices = ['4', '6'])
  55. parser.add_argument ('--asn', '-A', help = "Local AS number", required = True)
  56. parser.add_argument ('--ibgp', '-i', help = "Check iBGP sessions", action = 'store_true')
  57. parser.add_argument ('--ibgp_warn', '--ibgp_w', help = "Warning interval for down iBGP sessions", default = "1:1", metavar = "RANGE")
  58. parser.add_argument ('--ibgp_crit', '--ibgp_c', help = "Critical interval for down iBGP sessions", default = "2:", metavar = "RANGE")
  59. parser.add_argument ('--ebgp', '-e', help = "Check eBGP sessions", action = 'store_true')
  60. parser.add_argument ('--ebgp_warn', '--ebgp_w', help = "Warning interval for down eBGP sessions", default = "1:1", metavar = "RANGE")
  61. parser.add_argument ('--ebgp_crit', '--ebgp_c', help = "Critical interval for down eBGP sessions", default = "2:", metavar = "RANGE")
  62. parser.add_argument ('--disabled_ok', help = "Treat sessions disabled in bird as OK.", action = 'store_true')
  63. parser.add_argument ('--sessions_down_ok', metavar = "LIST", help = "List of sessions which are OK to be down. Provide a space separated list.")
  64. parser.add_argument ('--sessions_down_ok_file', metavar = "FILENAME", help = "List of sessions which are OK to be down. Provide one interfaces per line.")
  65. parser.add_argument ('--ignore_missing_file', help = "Ignore a possible non-existent file given as --interfaces_down_ok_file", action = 'store_true')
  66. parser.add_argument ('--session', help = "Only check for session with given name.")
  67. parser.add_argument ('--routes_imported_warn', help = "Warning interval for imported routes", metavar = "RANGE")
  68. parser.add_argument ('--routes_imported_crit', help = "Critical interval for imported routes", metavar = "RANGE")
  69. parser.add_argument ('--routes_exported_warn', help = "Warning interval for exported routes", metavar = "RANGE")
  70. parser.add_argument ('--routes_exported_crit', help = "Critical interval for exported routes", metavar = "RANGE")
  71. parser.add_argument ('--routes_preferred_warn', help = "Warning interval for preferred routes", metavar = "RANGE")
  72. parser.add_argument ('--routes_preferred_crit', help = "Critical interval for preferred routes", metavar = "RANGE")
  73. args = parser.parse_args ()
  74. if not args.ibgp and not args.ebgp:
  75. print ("Error: You have to enable at least one of iBGP and eBGP checking.\n", file=sys.stderr)
  76. parser.print_help ()
  77. sys.exit (3)
  78. if args.session and args.ibgp and args.ebgp:
  79. print ("Error: A single session can't be iBGP and eBGP at the same time!")
  80. parser.print_help ()
  81. sys.exit (3)
  82. # Validate limit arguments
  83. for item in ('ibgp', 'ebgp', 'routes_imported', 'routes_exported', 'routes_preferred'):
  84. for severity in ('warn', 'crit'):
  85. msg = validate_range_arg ("%s_%s" % (item, severity))
  86. if msg:
  87. print (msg)
  88. sys.exit (3)
  89. session_down_codes = {
  90. 'warn' : [ 1, 'WARNING' ],
  91. 'crit' : [ 2, 'CRITICAL'],
  92. }
  93. route_codes = {
  94. 'routes_exported' : 'Exported',
  95. 'routes_imported' : 'Imported',
  96. 'routes_preferred' : 'Preferred',
  97. }
  98. # Are some sessions ok being down?
  99. sessions_down_ok = []
  100. if args.sessions_down_ok:
  101. sessions_down_ok = args.sessions_down_ok.split ()
  102. if args.sessions_down_ok_file:
  103. sessions_down_ok.extend (read_sessions_from_file (args.sessions_down_ok_file, args.ignore_missing_file))
  104. ################################################################################
  105. # Query BGP protocols from bird #
  106. ################################################################################
  107. cmds = {
  108. '4' : '/usr/sbin/birdc',
  109. '6' : '/usr/sbin/birdc6',
  110. }
  111. # Check for one specific session only
  112. if args.session:
  113. cmd = [ "/usr/bin/sudo", cmds[args.proto], "show protocol all %s" % args.session ]
  114. # Check for all sessions and filter later
  115. else:
  116. cmd = [ "/usr/bin/sudo", cmds[args.proto], "show protocols all" ]
  117. try:
  118. protocols = subprocess.Popen (cmd, bufsize = 4194304, stdout = subprocess.PIPE).stdout
  119. # cmd exited with non-zero code
  120. except subprocess.CalledProcessError as c:
  121. print ("Failed to run %s: %s" % (" ".join (cmd), c.output))
  122. sys.exit (1)
  123. # This should not have happend.
  124. except Exception as e:
  125. print ("Unknown error while running %s: %s" % (" ".join (cmd), str (e)))
  126. sys.exit (3)
  127. # cr03_in_ffho_net BGP master up 2017-04-06 Established
  128. # Preference: 100
  129. # Input filter: ibgp_in
  130. # Output filter: ibgp_out
  131. # Routes: 38 imported, 3 exported, 1 preferred
  132. # Route change stats: received rejected filtered ignored accepted
  133. # Import updates: 16779 0 0 72 16707
  134. # Import withdraws: 18012 0 --- 1355 16657
  135. # Export updates: 55104 18903 24743 --- 11458
  136. # Export withdraws: 9789 --- --- --- 11455
  137. # BGP state: Established
  138. # Neighbor address: 10.132.255.3
  139. # Neighbor AS: 65132
  140. # Neighbor ID: 10.132.255.3
  141. # Neighbor caps: refresh enhanced-refresh restart-able AS4
  142. # Session: internal multihop AS4
  143. # Source address: 10.132.255.12
  144. # Hold timer: 198/240
  145. # Keepalive timer: 13/80
  146. ################################################################################
  147. # Parse all fields from bird output into bgp_sessions dict #
  148. ################################################################################
  149. bgp_sessions = {}
  150. # Simple fields with only one values
  151. simple_fields = [ 'Preference', 'Input filter', 'Output filter', 'BGP state', 'Neighbor address', 'Neighbor AS',
  152. 'Neighbor ID', 'Source address', 'Hold timer', 'Keepalive timer', 'Last error' ]
  153. # More "complex" fields
  154. fields = {
  155. 'Routes' : {
  156. 're' : re.compile (r'Routes:\s+(\d+) imported, (\d+) exported, (\d+) preferred'),
  157. 'groups' : [ 1, 2, 3 ],
  158. 'mangle_dict' : {
  159. 'Routes imported' : 1,
  160. 'Routes exported' : 2,
  161. 'Routes preferred' : 3,
  162. }
  163. },
  164. 'Neighbor caps' : {
  165. 're' : re.compile (r'Neighbor caps:\s+(.+)$'),
  166. 'groups' : [ 1 ],
  167. 'list' : True,
  168. 'split' : lambda x: x.split (),
  169. },
  170. 'Session' : {
  171. 're' : re.compile (r'Session:\s+(.+)$'),
  172. 'groups' : [ 1 ],
  173. 'list' : True,
  174. 'split' : lambda x: x.split (),
  175. },
  176. }
  177. # Generate entries for simple fields
  178. for field in simple_fields:
  179. fields[field] = {
  180. 're' : re.compile (r'^\s*%s:\s+(.+)$' % field),
  181. 'groups' : [ 1 ],
  182. }
  183. proto_re = re.compile (r'^([0-9a-zA-Z_.-]+)\s+BGP\s+') # XXX
  184. ignore_re = re.compile (r'^(BIRD [0-9.]+ ready.|name\s+proto\s+table\s+.*)?$')
  185. # Parse session list
  186. protocol = None
  187. proto_dict = None
  188. for line in protocols.readlines ():
  189. line = line.strip ()
  190. # Python3 glue
  191. if sys.version_info >= (3, 0):
  192. line = str (line, encoding='utf-8')
  193. # Preamble or empty string
  194. if ignore_re.search (line):
  195. protocol = None
  196. proto_dict = None
  197. continue
  198. # Start of a new protocol
  199. match = proto_re.search (line)
  200. if match:
  201. protocol = match.group (1)
  202. bgp_sessions[protocol] = {}
  203. proto_dict = bgp_sessions[protocol]
  204. continue
  205. # Ignore any non-BGP protocols, empty lines, etc.
  206. if protocol == None:
  207. continue
  208. # Parse and store any interesting lines / fields
  209. for field, config in fields.items ():
  210. match = config['re'].search (line)
  211. if not match:
  212. continue
  213. # Get values from match
  214. values = []
  215. for group in config['groups']:
  216. values.append (match.group (group))
  217. # Store entries separately?
  218. mangle_dict = config.get ('mangle_dict', None)
  219. if mangle_dict:
  220. for entry, group in mangle_dict.items ():
  221. proto_dict[entry] = match.group (group)
  222. # Store as list?
  223. if config.get ('list', False) == True:
  224. proto_dict[field] = config['split'] (match.group (1))
  225. # Store as string
  226. else:
  227. proto_dict[field] = " ".join (values)
  228. ################################################################################
  229. # Check the status quo #
  230. ################################################################################
  231. up = []
  232. down = []
  233. ret_code = 0
  234. down_by_proto = {
  235. 'ibgp' : [],
  236. 'ebgp' : []
  237. }
  238. proto_str = {
  239. 'ibgp' : 'iBGP',
  240. 'ebgp' : 'eBGP'
  241. }
  242. sessions_up = {}
  243. for protoname, config in sorted (bgp_sessions.items ()):
  244. session_args = config.get ('Session', [])
  245. # Check if user gave us a remote ASN as local AS
  246. if ('external' in session_args) and (config['Neighbor AS'] == args.asn):
  247. print ("ERROR: Session %s is eBGP but has our ASN! The given local ASN seems wrong!" % protoname)
  248. ret_code = 3
  249. if ('internal' in session_args) and (config['Neighbor AS'] != args.asn):
  250. print ("ERROR: Session %s is iBGP but does not have our ASN! The given local ASN seems wrong!" % protoname)
  251. ret_code = 3
  252. # Determine session type
  253. session_type = "ibgp"
  254. if ('external' in session_args) or (config['Neighbor AS'] != args.asn):
  255. session_type = "ebgp"
  256. remote_as = "I" if session_type == "ibgp" else config.get ('Neighbor AS')
  257. session_desc = "%s/%s" % (protoname, remote_as)
  258. # Skip iBGP/eBGP sessions when not asked to check them, but check for specific session, if given
  259. if (args.ibgp != True and (('internal' in session_args) or (config['Neighbor AS'] == args.asn))) or \
  260. (args.ebgp != True and (('external' in session_args) or (config['Neighbor AS'] != args.asn))):
  261. if not args.session:
  262. continue
  263. expected = "iBGP" if args.ibgp else "eBGP"
  264. print ("ERROR: Session %s is %s but %s was expected!" % (args.session, proto_str[session_type], expected))
  265. ret_code = 2
  266. bgp_state = config['BGP state']
  267. if bgp_state == 'Established':
  268. up.append (session_desc)
  269. sessions_up[session_desc] = config['Routes']
  270. # Session disable and we don't care
  271. elif bgp_state == 'Down' and args.disabled_ok:
  272. up.append (session_desc + " (Disabled)")
  273. # Session down but in session_down_ok* list
  274. elif protoname in sessions_down_ok:
  275. up.append (session_desc + " (Down/OK)")
  276. # Something's broken
  277. else:
  278. last_error = 'Disabled' if bgp_state == 'Down' else config.get ('Last error', 'unknown')
  279. session_desc += " (%s)" % last_error
  280. down.append (session_desc)
  281. down_by_proto[session_type].append (session_desc)
  282. # Check down iBGP / eBGP sessions limits
  283. for proto, sessions in down_by_proto.items ():
  284. down_sessions = len (sessions)
  285. if down_sessions == 0:
  286. continue
  287. for level in [ 'warn', 'crit' ]:
  288. limits = getattr (args, "%s_%s" % (proto, level)).split (":")
  289. code, code_name = session_down_codes[level]
  290. # Check if number of down sessions is within warning or critical limits
  291. if (limits[0] == '' or down_sessions >= int (limits[0])) and \
  292. (limits[1] == '' or down_sessions <= int (limits[1])):
  293. if ret_code < code:
  294. ret_code = code
  295. # Check routes for up sessions
  296. for session, routes in sessions_up.items ():
  297. session_info = {}
  298. session_info['routes_imported'], session_info['routes_exported'], session_info['routes_preferred'] = routes.split (' ')
  299. for r_type in route_codes.keys():
  300. for level in [ 'crit', 'warn' ]:
  301. try:
  302. limits = getattr (args, "%s_%s" % (r_type, level)).split (":")
  303. except:
  304. pass
  305. else:
  306. code, code_name = session_down_codes[level]
  307. if (limits[0] == '' or int(session_info[r_type]) >= int (limits[0])) and \
  308. (limits[1] == '' or int(session_info[r_type]) <= int (limits[1])):
  309. if ret_code < code:
  310. ret_code = code
  311. print("%s Routes: %s with %s route(s) is %s" % (route_codes[r_type],session,session_info[r_type],code_name))
  312. break
  313. # Special handling for session given by name
  314. if args.session:
  315. # Check is given session name was found
  316. if len (bgp_sessions) == 0:
  317. print ("ERROR: Given session %s not present in configuration!" % args.session)
  318. sys.exit (2)
  319. if len (down) > 0:
  320. print ("DOWN: %s" % ", ".join (down))
  321. if len (up) > 0:
  322. print ("OK: %s" % ", ".join (up))
  323. sys.exit (ret_code)