check_bird_bgp 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. #!/usr/bin/python
  2. #
  3. # Check state of BGP sessions in Bird Internet Routing Daemon
  4. #
  5. # Maximilian Wilhelm <max@rfc2324.org>
  6. # -- Thu 13 Apr 2017 12:04:13 PM CEST
  7. #
  8. import argparse
  9. import os
  10. import re
  11. import subprocess
  12. import sys
  13. def read_sessions_from_file (file_path, missing_ok):
  14. sessions = []
  15. # If we shouldn't care, we won't care if it's not there.
  16. if not os.path.isfile (file_path) and missing_ok:
  17. return sessions
  18. try:
  19. with open (args.sessions_down_ok_file, 'r') as ido_fh:
  20. for session in ido_fh.readlines ():
  21. if not session.startswith ('#'):
  22. sessions.append (session.strip ())
  23. except IOError as (errno, strerror):
  24. print "Failed to read sessions_down_ok from '%s': %s" % (args.sessions_down_ok_file, strerror)
  25. sys.exit (1)
  26. return sessions
  27. parser = argparse.ArgumentParser (description = 'check bird iBGP sessions')
  28. parser.add_argument ('--proto', '-p', help = 'IP protocol version to check', default = '4', choices = ['4', '6'])
  29. parser.add_argument ('--asn', '-A', help = "Local AS number", required = True)
  30. parser.add_argument ('--ibgp', '-i', help = "Check iBGP sessions", action = 'store_true')
  31. parser.add_argument ('--ibgp_w', help = "Warning interval for down iBGP sessions", default = "1:1", metavar = "RANGE")
  32. parser.add_argument ('--ibgp_c', help = "Critical interval for down iBGP sessions", default = "2:", metavar = "RANGE")
  33. parser.add_argument ('--ebgp', '-e', help = "Check eBGP sessions", action = 'store_true')
  34. parser.add_argument ('--ebgp_w', help = "Warning interval for down eBGP sessions", default = "1:1", metavar = "RANGE")
  35. parser.add_argument ('--ebgp_c', help = "Critical interval for down eBGP sessions", default = "2:", metavar = "RANGE")
  36. parser.add_argument ('--disabled_ok', help = "Treat sessions disabled in bird as OK.", action = 'store_true')
  37. parser.add_argument ('--sessions_down_ok', metavar = "LIST", help = "List of sessions which are OK to be down. Provide a space separated list.")
  38. parser.add_argument ('--sessions_down_ok_file', metavar = "FILENAME", help = "List of sessions which are OK to be down. Provide one interfaces per line.")
  39. parser.add_argument ('--ignore_missing_file', help = "Ignore a possible non-existent file given as --interfaces_down_ok_file", action = 'store_true')
  40. args = parser.parse_args ()
  41. if not args.ibgp and not args.ebgp:
  42. print >> sys.stderr, "Error: You have to enable at least one of iBGP and eBGP checking.\n"
  43. parser.print_help ()
  44. sys.exit (3)
  45. session_down_codes = {
  46. 'w' : 1,
  47. 'c' : 2,
  48. }
  49. # Are some sessions ok being down?
  50. sessions_down_ok = []
  51. if args.sessions_down_ok:
  52. sessions_down_ok = args.sessions_down_ok.split ()
  53. if args.sessions_down_ok_file:
  54. sessions_down_ok.extend (read_sessions_from_file (args.sessions_down_ok_file, args.ignore_missing_file))
  55. ################################################################################
  56. # Query BGP protocols from bird #
  57. ################################################################################
  58. cmds = {
  59. '4' : '/usr/sbin/birdc',
  60. '6' : '/usr/sbin/birdc6',
  61. }
  62. cmd = [ "/usr/bin/sudo", cmds[args.proto], "show protocols all" ]
  63. try:
  64. protocols = subprocess.Popen (cmd, bufsize = 4194304, stdout = subprocess.PIPE).stdout
  65. # cmd exited with non-zero code
  66. except subprocess.CalledProcessError as c:
  67. print "Failed to run %s: %s" % (" ".join (cmd), c.output)
  68. sys.exit (1)
  69. # This should not have happend.
  70. except Exception as e:
  71. print "Unknown error while running %s: %s" % (" ".join (cmd), str (e))
  72. sys.exit (3)
  73. # cr03_in_ffho_net BGP master up 2017-04-06 Established
  74. # Preference: 100
  75. # Input filter: ibgp_in
  76. # Output filter: ibgp_out
  77. # Routes: 38 imported, 3 exported, 1 preferred
  78. # Route change stats: received rejected filtered ignored accepted
  79. # Import updates: 16779 0 0 72 16707
  80. # Import withdraws: 18012 0 --- 1355 16657
  81. # Export updates: 55104 18903 24743 --- 11458
  82. # Export withdraws: 9789 --- --- --- 11455
  83. # BGP state: Established
  84. # Neighbor address: 10.132.255.3
  85. # Neighbor AS: 65132
  86. # Neighbor ID: 10.132.255.3
  87. # Neighbor caps: refresh enhanced-refresh restart-able AS4
  88. # Session: internal multihop AS4
  89. # Source address: 10.132.255.12
  90. # Hold timer: 198/240
  91. # Keepalive timer: 13/80
  92. ################################################################################
  93. # Parse all fields from bird output into bgp_sessions dict #
  94. ################################################################################
  95. bgp_sessions = {}
  96. # Simple fields with only one values
  97. simple_fields = [ 'Preference', 'Input filter', 'Output filter', 'BGP state', 'Neighbor address', 'Neighbor AS',
  98. 'Neighbor ID', 'Source address', 'Hold timer', 'Keepalive timer', 'Last error' ]
  99. # More "complex" fields
  100. fields = {
  101. 'Routes' : {
  102. 're' : re.compile (r'Routes:\s+(\d+) imported, (\d+) exported, (\d+) preferred'),
  103. 'groups' : [ 1, 2, 3 ],
  104. 'mangle_dict' : {
  105. 'Routes imported' : 1,
  106. 'Routes exported' : 2,
  107. 'Routes preferred' : 3,
  108. }
  109. },
  110. 'Neighbor caps' : {
  111. 're' : re.compile (r'Neighbor caps:\s+(.+)$'),
  112. 'groups' : [ 1 ],
  113. 'list' : True,
  114. 'split' : lambda x: x.split (),
  115. },
  116. 'Session' : {
  117. 're' : re.compile (r'Session:\s+(.+)$'),
  118. 'groups' : [ 1 ],
  119. 'list' : True,
  120. 'split' : lambda x: x.split (),
  121. },
  122. }
  123. # Generate entries for simple fields
  124. for field in simple_fields:
  125. fields[field] = {
  126. 're' : re.compile (r'^\s*%s:\s+(.+)$' % field),
  127. 'groups' : [ 1 ],
  128. }
  129. proto_re = re.compile (r'^([0-9a-zA-Z_.-]+)\s+BGP\s+') # XXX
  130. ignore_re = re.compile (r'^(BIRD [0-9.]+ ready.|name\s+proto\s+table\s+.*)?$')
  131. # Parse session list
  132. protocol = None
  133. proto_dict = None
  134. for line in protocols.readlines ():
  135. line = line.strip ()
  136. # Preamble or empty string
  137. if ignore_re.search (line):
  138. protocol = None
  139. proto_dict = None
  140. continue
  141. # Start of a new protocol
  142. match = proto_re.search (line)
  143. if match:
  144. protocol = match.group (1)
  145. bgp_sessions[protocol] = {}
  146. proto_dict = bgp_sessions[protocol]
  147. continue
  148. # Ignore any non-BGP protocols, empty lines, etc.
  149. if protocol == None:
  150. continue
  151. # Parse and store any interesting lines / fields
  152. for field, config in fields.items ():
  153. match = config['re'].search (line)
  154. if not match:
  155. continue
  156. # Get values from match
  157. values = []
  158. for group in config['groups']:
  159. values.append (match.group (group))
  160. # Store entries separately?
  161. mangle_dict = config.get ('mangle_dict', None)
  162. if mangle_dict:
  163. for entry, group in mangle_dict.items ():
  164. proto_dict[entry] = match.group (group)
  165. # Store as list?
  166. if config.get ('list', False) == True:
  167. proto_dict[field] = config['split'] (match.group (1))
  168. # Store as string
  169. else:
  170. proto_dict[field] = " ".join (values)
  171. ################################################################################
  172. # Check the status quo #
  173. ################################################################################
  174. up = []
  175. down = []
  176. ret_code = 0
  177. down_by_proto = {
  178. 'ibgp' : [],
  179. 'ebgp' : []
  180. }
  181. for protoname, config in sorted (bgp_sessions.items ()):
  182. # Skip iBGP/eBGP sessions when not asked to check them
  183. session_args = config.get ('Session', [])
  184. if (args.ibgp != True and (('internal' in session_args) or (config['Neighbor AS'] == args.asn))) or \
  185. (args.ebgp != True and (('external' in session_args) or (config['Neighbor AS'] != args.asn))):
  186. continue
  187. session_type = "ibgp"
  188. if ('external' in session_args) or (config['Neighbor AS'] != args.asn):
  189. session_type = "ebgp"
  190. remote_as = "I" if session_type == "ibgp" else config.get ('Neighbor AS')
  191. session_desc = "%s/%s" % (protoname, remote_as)
  192. bgp_state = config['BGP state']
  193. if bgp_state == 'Established':
  194. up.append (session_desc)
  195. # Session disable and we don't care
  196. elif bgp_state == 'Down' and args.disabled_ok:
  197. up.append (session_desc + " (Disabled)")
  198. # Session down but in session_down_ok* list
  199. elif protoname in sessions_down_ok:
  200. up.append (session_desc + " (Down/OK)")
  201. # Something's broken
  202. else:
  203. last_error = 'Disabled' if bgp_state == 'Down' else config.get ('Last error', 'unkown')
  204. session_desc += " (%s)" % last_error
  205. down.append (session_desc)
  206. down_by_proto[session_type].append (session_desc)
  207. for proto, sessions in down_by_proto.items ():
  208. down_sessions = len (sessions)
  209. if down_sessions == 0:
  210. continue
  211. for level in [ 'w', 'c' ]:
  212. limits = getattr (args, "%s_%s" % (proto, level)).split (":")
  213. code = session_down_codes[level]
  214. # Check if
  215. if (limits[0] == '' or down_sessions >= int (limits[0])) and \
  216. (limits[1] == '' or down_sessions <= int (limits[1])):
  217. if ret_code < code:
  218. ret_code = code
  219. if len (down) > 0:
  220. print "DOWN: %s" % ", ".join (down)
  221. if len (up) > 0:
  222. print "OK: %s" % ", ".join (up)
  223. sys.exit (ret_code)