check_bird_bgp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. #!/usr/bin/python
  2. #
  3. # Check state of BGP sessions in Bird Internet Routing Daemon
  4. #
  5. # Maximilian Wilhelm <max@rfc2324.org>
  6. # -- Thu 13 Apr 2017 12:04:13 PM CEST
  7. #
  8. import argparse
  9. import re
  10. import subprocess
  11. import sys
  12. parser = argparse.ArgumentParser (description = 'check bird iBGP sessions')
  13. parser.add_argument ('--proto', '-p', help = 'IP protocol version to check', default = '4', choices = ['4', '6'])
  14. parser.add_argument ('--asn', '-A', help = "Local AS number", required = True)
  15. parser.add_argument ('--ibgp', '-i', help = "Check iBGP sessions", action = 'store_true')
  16. parser.add_argument ('--ibgp_w', help = "Warning interval for down iBGP sessions", default = "1:1", metavar = "RANGE")
  17. parser.add_argument ('--ibgp_c', help = "Critical interval for down iBGP sessions", default = "2:", metavar = "RANGE")
  18. parser.add_argument ('--ebgp', '-e', help = "Check eBGP sessions", action = 'store_true')
  19. parser.add_argument ('--ebgp_w', help = "Warning interval for down eBGP sessions", default = "1:1", metavar = "RANGE")
  20. parser.add_argument ('--ebgp_c', help = "Critical interval for down eBGP sessions", default = "2:", metavar = "RANGE")
  21. parser.add_argument ('--disabled_ok', help = "Treat sessions disabled in bird as OK.", action = 'store_true')
  22. args = parser.parse_args ()
  23. if not args.ibgp and not args.ebgp:
  24. print >> sys.stderr, "Error: You have to enable at least one of iBGP and eBGP checking.\n"
  25. parser.print_help ()
  26. sys.exit (3)
  27. session_down_codes = {
  28. 'w' : 1,
  29. 'c' : 2,
  30. }
  31. ################################################################################
  32. # Query BGP protocols from bird #
  33. ################################################################################
  34. cmds = {
  35. '4' : '/usr/sbin/birdc',
  36. '6' : '/usr/sbin/birdc6',
  37. }
  38. cmd = [ "/usr/bin/sudo", cmds[args.proto], "show protocols all" ]
  39. try:
  40. protocols = subprocess.Popen (cmd, bufsize = 4194304, stdout = subprocess.PIPE).stdout
  41. # cmd exited with non-zero code
  42. except subprocess.CalledProcessError as c:
  43. print "Failed to run %s: %s" % (" ".join (cmd), c.output)
  44. sys.exit (1)
  45. # This should not have happend.
  46. except Exception as e:
  47. print "Unknown error while running %s: %s" % (" ".join (cmd), str (e))
  48. sys.exit (3)
  49. # cr03_in_ffho_net BGP master up 2017-04-06 Established
  50. # Preference: 100
  51. # Input filter: ibgp_in
  52. # Output filter: ibgp_out
  53. # Routes: 38 imported, 3 exported, 1 preferred
  54. # Route change stats: received rejected filtered ignored accepted
  55. # Import updates: 16779 0 0 72 16707
  56. # Import withdraws: 18012 0 --- 1355 16657
  57. # Export updates: 55104 18903 24743 --- 11458
  58. # Export withdraws: 9789 --- --- --- 11455
  59. # BGP state: Established
  60. # Neighbor address: 10.132.255.3
  61. # Neighbor AS: 65132
  62. # Neighbor ID: 10.132.255.3
  63. # Neighbor caps: refresh enhanced-refresh restart-able AS4
  64. # Session: internal multihop AS4
  65. # Source address: 10.132.255.12
  66. # Hold timer: 198/240
  67. # Keepalive timer: 13/80
  68. ################################################################################
  69. # Parse all fields from bird output into bgp_sessions dict #
  70. ################################################################################
  71. bgp_sessions = {}
  72. # Simple fields with only one values
  73. simple_fields = [ 'Preference', 'Input filter', 'Output filter', 'BGP state', 'Neighbor address', 'Neighbor AS',
  74. 'Neighbor ID', 'Source address', 'Hold timer', 'Keepalive timer', 'Last error' ]
  75. # More "complex" fields
  76. fields = {
  77. 'Routes' : {
  78. 're' : re.compile (r'Routes:\s+(\d+) imported, (\d+) exported, (\d+) preferred'),
  79. 'groups' : [ 1, 2, 3 ],
  80. 'mangle_dict' : {
  81. 'Routes imported' : 1,
  82. 'Routes exported' : 2,
  83. 'Routes preferred' : 3,
  84. }
  85. },
  86. 'Neighbor caps' : {
  87. 're' : re.compile (r'Neighbor caps:\s+(.+)$'),
  88. 'groups' : [ 1 ],
  89. 'list' : True,
  90. 'split' : lambda x: x.split (),
  91. },
  92. 'Session' : {
  93. 're' : re.compile (r'Session:\s+(.+)$'),
  94. 'groups' : [ 1 ],
  95. 'list' : True,
  96. 'split' : lambda x: x.split (),
  97. },
  98. }
  99. # Generate entries for simple fields
  100. for field in simple_fields:
  101. fields[field] = {
  102. 're' : re.compile (r'^\s*%s:\s+(.+)$' % field),
  103. 'groups' : [ 1 ],
  104. }
  105. proto_re = re.compile (r'^([0-9a-zA-Z_.-]+)\s+BGP\s+') # XXX
  106. ignore_re = re.compile (r'^(BIRD [0-9.]+ ready.|name\s+proto\s+table\s+.*)?$')
  107. # Parse session list
  108. protocol = None
  109. proto_dict = None
  110. for line in protocols.readlines ():
  111. line = line.strip ()
  112. # Preamble or empty string
  113. if ignore_re.search (line):
  114. protocol = None
  115. proto_dict = None
  116. continue
  117. # Start of a new protocol
  118. match = proto_re.search (line)
  119. if match:
  120. protocol = match.group (1)
  121. bgp_sessions[protocol] = {}
  122. proto_dict = bgp_sessions[protocol]
  123. continue
  124. # Ignore any non-BGP protocols, empty lines, etc.
  125. if protocol == None:
  126. continue
  127. # Parse and store any interesting lines / fields
  128. for field, config in fields.items ():
  129. match = config['re'].search (line)
  130. if not match:
  131. continue
  132. # Get values from match
  133. values = []
  134. for group in config['groups']:
  135. values.append (match.group (group))
  136. # Store entries separately?
  137. mangle_dict = config.get ('mangle_dict', None)
  138. if mangle_dict:
  139. for entry, group in mangle_dict.items ():
  140. proto_dict[entry] = match.group (group)
  141. # Store as list?
  142. if config.get ('list', False) == True:
  143. proto_dict[field] = config['split'] (match.group (1))
  144. # Store as string
  145. else:
  146. proto_dict[field] = " ".join (values)
  147. ################################################################################
  148. # Check the status quo #
  149. ################################################################################
  150. up = []
  151. down = []
  152. ret_code = 0
  153. down_by_proto = {
  154. 'ibgp' : [],
  155. 'ebgp' : []
  156. }
  157. for protoname, config in sorted (bgp_sessions.items ()):
  158. # Skip iBGP/eBGP sessions when not asked to check them
  159. session_args = config.get ('Session', [])
  160. if (args.ibgp != True and (('internal' in session_args) or (config['Neighbor AS'] == args.asn))) or \
  161. (args.ebgp != True and (('external' in session_args) or (config['Neighbor AS'] != args.asn))):
  162. continue
  163. session_type = "ibgp"
  164. if ('external' in session_args) or (config['Neighbor AS'] != args.asn):
  165. session_type = "ebgp"
  166. remote_as = "I" if session_type == "ibgp" else config.get ('Neighbor AS')
  167. session_desc = "%s/%s" % (protoname, remote_as)
  168. bgp_state = config['BGP state']
  169. if bgp_state == 'Established':
  170. up.append (session_desc)
  171. # Session disable and we don't care
  172. elif bgp_state == 'Down' and args.disabled_ok:
  173. up.append (session_desc + " (Disabled)")
  174. # Something's broken
  175. else:
  176. last_error = 'Disabled' if bgp_state == 'Down' else config.get ('Last error', 'unkown')
  177. session_desc += " (%s)" % last_error
  178. down.append (session_desc)
  179. down_by_proto[session_type].append (session_desc)
  180. for proto, sessions in down_by_proto.items ():
  181. down_sessions = len (sessions)
  182. if down_sessions == 0:
  183. continue
  184. for level in [ 'w', 'c' ]:
  185. limits = getattr (args, "%s_%s" % (proto, level)).split (":")
  186. code = session_down_codes[level]
  187. # Check if
  188. if (limits[0] == '' or down_sessions >= int (limits[0])) and \
  189. (limits[1] == '' or down_sessions <= int (limits[1])):
  190. if ret_code < code:
  191. ret_code = code
  192. if len (down) > 0:
  193. print "DOWN: %s" % ", ".join (down)
  194. if len (up) > 0:
  195. print "OK: %s" % ", ".join (up)
  196. sys.exit (ret_code)