check_bird_bgp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. #!/usr/bin/python3
  2. #
  3. # Check state of BGP sessions in Bird Internet Routing Daemon
  4. #
  5. # Maximilian Wilhelm <max@rfc2324.org>
  6. # -- Thu 13 Apr 2017 12:04:13 PM CEST
  7. #
  8. import argparse
  9. import os
  10. import re
  11. import subprocess
  12. import sys
  13. def read_sessions_from_file (file_path, missing_ok):
  14. sessions = []
  15. # If we shouldn't care, we won't care if it's not there.
  16. if not os.path.isfile (file_path) and missing_ok:
  17. return sessions
  18. try:
  19. with open (args.sessions_down_ok_file, 'r') as ido_fh:
  20. for session in ido_fh.readlines ():
  21. if not session.startswith ('#'):
  22. sessions.append (session.strip ())
  23. except IOError as err:
  24. errno, strerror = err.args
  25. print ("Failed to read sessions_down_ok from '%s': %s" % (args.sessions_down_ok_file, strerror))
  26. sys.exit (1)
  27. return sessions
  28. def validate_range_arg (arg_name):
  29. value = getattr (args, arg_name)
  30. if not value:
  31. return None
  32. # Check if a RANGE was given
  33. limits = value.split (':')
  34. if len (limits) != 2:
  35. return "Error: Invalid value for --%s, expected RANGE: %s" % (arg_name, value)
  36. # Try to validate range, on limit might be empty
  37. try:
  38. # Try to parse range values to integers if present
  39. a = None
  40. b = None
  41. if (limits[0] != ''):
  42. a = int (limits[0])
  43. if (limits[1] != ''):
  44. b = int (limits[1])
  45. # Validate range if both values were given
  46. if (a != None and b != None and a > b):
  47. return "Error: Invalid value for --%s, invalid RANGE: %s" % (arg_name, value)
  48. except ValueError:
  49. return "Error: Expected numeric values in RANGE for --%s: %s" % (arg_name, value)
  50. ################################################################################
  51. # Argument parsing and basic input validation #
  52. ################################################################################
  53. parser = argparse.ArgumentParser (description = 'check bird iBGP sessions')
  54. parser.add_argument ('--proto', '-p', help = 'IP protocol version to check', default = '4', choices = ['4', '6'])
  55. parser.add_argument ('--asn', '-A', help = "Local AS number", required = True)
  56. parser.add_argument ('--ibgp', '-i', help = "Check iBGP sessions", action = 'store_true')
  57. parser.add_argument ('--ibgp_warn', '--ibgp_w', help = "Warning interval for down iBGP sessions", default = "1:1", metavar = "RANGE")
  58. parser.add_argument ('--ibgp_crit', '--ibgp_c', help = "Critical interval for down iBGP sessions", default = "2:", metavar = "RANGE")
  59. parser.add_argument ('--ebgp', '-e', help = "Check eBGP sessions", action = 'store_true')
  60. parser.add_argument ('--ebgp_warn', '--ebgp_w', help = "Warning interval for down eBGP sessions", default = "1:1", metavar = "RANGE")
  61. parser.add_argument ('--ebgp_crit', '--ebgp_c', help = "Critical interval for down eBGP sessions", default = "2:", metavar = "RANGE")
  62. parser.add_argument ('--disabled_ok', help = "Treat sessions disabled in bird as OK.", action = 'store_true')
  63. parser.add_argument ('--sessions_down_ok', metavar = "LIST", help = "List of sessions which are OK to be down. Provide a space separated list.")
  64. parser.add_argument ('--sessions_down_ok_file', metavar = "FILENAME", help = "List of sessions which are OK to be down. Provide one interfaces per line.")
  65. parser.add_argument ('--ignore_missing_file', help = "Ignore a possible non-existent file given as --interfaces_down_ok_file", action = 'store_true')
  66. parser.add_argument ('--session', help = "Only check for session with given name.")
  67. parser.add_argument ('--routes_imported_warn', help = "Warning interval for imported routes", metavar = "RANGE")
  68. parser.add_argument ('--routes_imported_crit', help = "Critical interval for imported routes", metavar = "RANGE")
  69. parser.add_argument ('--routes_exported_warn', help = "Warning interval for exported routes", metavar = "RANGE")
  70. parser.add_argument ('--routes_exported_crit', help = "Critical interval for exported routes", metavar = "RANGE")
  71. parser.add_argument ('--routes_preferred_warn', help = "Warning interval for preferred routes", metavar = "RANGE")
  72. parser.add_argument ('--routes_preferred_crit', help = "Critical interval for preferred routes", metavar = "RANGE")
  73. args = parser.parse_args ()
  74. if not args.ibgp and not args.ebgp:
  75. print ("Error: You have to enable at least one of iBGP and eBGP checking.\n", file=sys.stderr)
  76. parser.print_help ()
  77. sys.exit (3)
  78. if args.session and args.ibgp and args.ebgp:
  79. print ("Error: A single session can't be iBGP and eBGP at the same time!")
  80. parser.print_help ()
  81. sys.exit (3)
  82. # Validate limit arguments
  83. for item in ('ibgp', 'ebgp', 'routes_imported', 'routes_exported', 'routes_preferred'):
  84. for severity in ('warn', 'crit'):
  85. msg = validate_range_arg ("%s_%s" % (item, severity))
  86. if msg:
  87. print (msg)
  88. sys.exit (3)
  89. session_down_codes = {
  90. 'warn' : [ 1, 'WARNING' ],
  91. 'crit' : [ 2, 'CRITICAL'],
  92. }
  93. route_codes = {
  94. 'routes_exported' : 'Exported',
  95. 'routes_imported' : 'Imported',
  96. 'routes_preferred' : 'Preferred',
  97. }
  98. # Are some sessions ok being down?
  99. sessions_down_ok = []
  100. if args.sessions_down_ok:
  101. sessions_down_ok = args.sessions_down_ok.split ()
  102. if args.sessions_down_ok_file:
  103. sessions_down_ok.extend (read_sessions_from_file (args.sessions_down_ok_file, args.ignore_missing_file))
  104. ################################################################################
  105. # Query BGP protocols from bird #
  106. ################################################################################
  107. cmds = {
  108. '4' : '/usr/sbin/birdc',
  109. '6' : '/usr/sbin/birdc6',
  110. }
  111. # Check for one specific session only
  112. if args.session:
  113. cmd = [ "/usr/bin/sudo", cmds[args.proto], "show protocol all %s" % args.session ]
  114. # Check for all sessions and filter later
  115. else:
  116. cmd = [ "/usr/bin/sudo", cmds[args.proto], "show protocols all" ]
  117. try:
  118. protocols = subprocess.Popen (cmd, bufsize = 4194304, stdout = subprocess.PIPE).stdout
  119. # cmd exited with non-zero code
  120. except subprocess.CalledProcessError as c:
  121. print ("Failed to run %s: %s" % (" ".join (cmd), c.output))
  122. sys.exit (1)
  123. # This should not have happend.
  124. except Exception as e:
  125. print ("Unknown error while running %s: %s" % (" ".join (cmd), str (e)))
  126. sys.exit (3)
  127. # cr03_in_ffho_net BGP master up 2017-04-06 Established
  128. # Preference: 100
  129. # Input filter: ibgp_in
  130. # Output filter: ibgp_out
  131. # Routes: 38 imported, 3 exported, 1 preferred
  132. # OR
  133. # Routes: 1 imported, 0 filtered, 1 exported, 0 preferred
  134. # Route change stats: received rejected filtered ignored accepted
  135. # Import updates: 16779 0 0 72 16707
  136. # Import withdraws: 18012 0 --- 1355 16657
  137. # Export updates: 55104 18903 24743 --- 11458
  138. # Export withdraws: 9789 --- --- --- 11455
  139. # BGP state: Established
  140. # Neighbor address: 10.132.255.3
  141. # Neighbor AS: 65132
  142. # Neighbor ID: 10.132.255.3
  143. # Neighbor caps: refresh enhanced-refresh restart-able AS4
  144. # Session: internal multihop AS4
  145. # Source address: 10.132.255.12
  146. # Hold timer: 198/240
  147. # Keepalive timer: 13/80
  148. ################################################################################
  149. # Parse all fields from bird output into bgp_sessions dict #
  150. ################################################################################
  151. bgp_sessions = {}
  152. # Simple fields with only one values
  153. simple_fields = [ 'Preference', 'Input filter', 'Output filter', 'BGP state', 'Neighbor address', 'Neighbor AS',
  154. 'Neighbor ID', 'Source address', 'Hold timer', 'Keepalive timer', 'Last error' ]
  155. # More "complex" fields
  156. fields = {
  157. 'Routes' : {
  158. 're' : re.compile (r'Routes:\s+(\d+) imported, ((\d+) filtered, )?(\d+) exported, (\d+) preferred'),
  159. 'groups' : [ 1, 4, 5 ],
  160. 'mangle_dict' : {
  161. 'Routes imported' : 1,
  162. 'Routes exported' : 4,
  163. 'Routes preferred' : 5,
  164. }
  165. },
  166. 'Neighbor caps' : {
  167. 're' : re.compile (r'Neighbor caps:\s+(.+)$'),
  168. 'groups' : [ 1 ],
  169. 'list' : True,
  170. 'split' : lambda x: x.split (),
  171. },
  172. 'Session' : {
  173. 're' : re.compile (r'Session:\s+(.+)$'),
  174. 'groups' : [ 1 ],
  175. 'list' : True,
  176. 'split' : lambda x: x.split (),
  177. },
  178. }
  179. # Generate entries for simple fields
  180. for field in simple_fields:
  181. fields[field] = {
  182. 're' : re.compile (r'^\s*%s:\s+(.+)$' % field),
  183. 'groups' : [ 1 ],
  184. }
  185. proto_re = re.compile (r'^([0-9a-zA-Z_.-]+)\s+BGP\s+') # XXX
  186. ignore_re = re.compile (r'^(BIRD [0-9.]+ ready.|name\s+proto\s+table\s+.*)?$')
  187. # Parse session list
  188. protocol = None
  189. proto_dict = None
  190. for line in protocols.readlines ():
  191. line = line.strip ()
  192. # Python3 glue
  193. if sys.version_info >= (3, 0):
  194. line = str (line, encoding='utf-8')
  195. # Preamble or empty string
  196. if ignore_re.search (line):
  197. protocol = None
  198. proto_dict = None
  199. continue
  200. # Start of a new protocol
  201. match = proto_re.search (line)
  202. if match:
  203. protocol = match.group (1)
  204. bgp_sessions[protocol] = {}
  205. proto_dict = bgp_sessions[protocol]
  206. continue
  207. # Ignore any non-BGP protocols, empty lines, etc.
  208. if protocol == None:
  209. continue
  210. # Parse and store any interesting lines / fields
  211. for field, config in fields.items ():
  212. match = config['re'].search (line)
  213. if not match:
  214. continue
  215. # Get values from match
  216. values = []
  217. for group in config['groups']:
  218. values.append (match.group (group))
  219. # Store entries separately?
  220. mangle_dict = config.get ('mangle_dict', None)
  221. if mangle_dict:
  222. for entry, group in mangle_dict.items ():
  223. proto_dict[entry] = match.group (group)
  224. # Store as list?
  225. if config.get ('list', False) == True:
  226. proto_dict[field] = config['split'] (match.group (1))
  227. # Store as string
  228. else:
  229. proto_dict[field] = " ".join (values)
  230. ################################################################################
  231. # Check the status quo #
  232. ################################################################################
  233. up = []
  234. down = []
  235. ret_code = 0
  236. down_by_proto = {
  237. 'ibgp' : [],
  238. 'ebgp' : []
  239. }
  240. proto_str = {
  241. 'ibgp' : 'iBGP',
  242. 'ebgp' : 'eBGP'
  243. }
  244. sessions_up = {}
  245. for protoname, config in sorted (bgp_sessions.items ()):
  246. session_args = config.get ('Session', [])
  247. # Check if user gave us a remote ASN as local AS
  248. if ('external' in session_args) and (config['Neighbor AS'] == args.asn):
  249. print ("ERROR: Session %s is eBGP but has our ASN! The given local ASN seems wrong!" % protoname)
  250. ret_code = 3
  251. if ('internal' in session_args) and (config['Neighbor AS'] != args.asn):
  252. print ("ERROR: Session %s is iBGP but does not have our ASN! The given local ASN seems wrong!" % protoname)
  253. ret_code = 3
  254. # Determine session type
  255. session_type = "ibgp"
  256. if ('external' in session_args) or (config['Neighbor AS'] != args.asn):
  257. session_type = "ebgp"
  258. remote_as = "I" if session_type == "ibgp" else config.get ('Neighbor AS')
  259. session_desc = "%s/%s" % (protoname, remote_as)
  260. # Skip iBGP/eBGP sessions when not asked to check them, but check for specific session, if given
  261. if (args.ibgp != True and (('internal' in session_args) or (config['Neighbor AS'] == args.asn))) or \
  262. (args.ebgp != True and (('external' in session_args) or (config['Neighbor AS'] != args.asn))):
  263. if not args.session:
  264. continue
  265. expected = "iBGP" if args.ibgp else "eBGP"
  266. print ("ERROR: Session %s is %s but %s was expected!" % (args.session, proto_str[session_type], expected))
  267. ret_code = 2
  268. bgp_state = config['BGP state']
  269. if bgp_state == 'Established':
  270. up.append (session_desc)
  271. sessions_up[session_desc] = config['Routes']
  272. # Session disable and we don't care
  273. elif bgp_state == 'Down' and args.disabled_ok:
  274. up.append (session_desc + " (Disabled)")
  275. # Session down but in session_down_ok* list
  276. elif protoname in sessions_down_ok:
  277. up.append (session_desc + " (Down/OK)")
  278. # Something's broken
  279. else:
  280. last_error = 'Disabled' if bgp_state == 'Down' else config.get ('Last error', 'unknown')
  281. session_desc += " (%s)" % last_error
  282. down.append (session_desc)
  283. down_by_proto[session_type].append (session_desc)
  284. # Check down iBGP / eBGP sessions limits
  285. for proto, sessions in down_by_proto.items ():
  286. down_sessions = len (sessions)
  287. if down_sessions == 0:
  288. continue
  289. for level in [ 'warn', 'crit' ]:
  290. limits = getattr (args, "%s_%s" % (proto, level)).split (":")
  291. code, code_name = session_down_codes[level]
  292. # Check if number of down sessions is within warning or critical limits
  293. if (limits[0] == '' or down_sessions >= int (limits[0])) and \
  294. (limits[1] == '' or down_sessions <= int (limits[1])):
  295. if ret_code < code:
  296. ret_code = code
  297. # Check routes for up sessions
  298. for session, routes in sessions_up.items ():
  299. session_info = {}
  300. session_info['routes_imported'], session_info['routes_exported'], session_info['routes_preferred'] = routes.split (' ')
  301. for r_type in route_codes.keys():
  302. for level in [ 'crit', 'warn' ]:
  303. try:
  304. limits = getattr (args, "%s_%s" % (r_type, level)).split (":")
  305. except:
  306. pass
  307. else:
  308. code, code_name = session_down_codes[level]
  309. if (limits[0] == '' or int(session_info[r_type]) >= int (limits[0])) and \
  310. (limits[1] == '' or int(session_info[r_type]) <= int (limits[1])):
  311. if ret_code < code:
  312. ret_code = code
  313. print("%s Routes: %s with %s route(s) is %s" % (route_codes[r_type],session,session_info[r_type],code_name))
  314. break
  315. # Special handling for session given by name
  316. if args.session:
  317. # Check is given session name was found
  318. if len (bgp_sessions) == 0:
  319. print ("ERROR: Given session %s not present in configuration!" % args.session)
  320. sys.exit (2)
  321. if len (down) > 0:
  322. print ("DOWN: %s" % ", ".join (down))
  323. if len (up) > 0:
  324. print ("OK: %s" % ", ".join (up))
  325. sys.exit (ret_code)