check_syncrepl_extended 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673
  1. #!/usr/bin/env python3
  2. #
  3. # Script to check LDAP syncrepl replication state between two servers.
  4. # One server is consider as provider and the other as consumer.
  5. #
  6. # This script can check replication state with two method :
  7. # - by the fisrt, entryCSN of all entries of LDAP directory will be
  8. # compare between two servers
  9. # - by the second, all values of all atributes of all entries will
  10. # be compare between two servers.
  11. #
  12. # In all case, contextCSN of servers will be compare and entries not
  13. # present in consumer or in provider will be notice. You can decide to
  14. # disable contextCSN verification by using argument --no-check-contextCSN.
  15. #
  16. # This script is also able to "touch" LDAP object on provider to force
  17. # synchronisation of this object. This mechanism consist to add '%%TOUCH%%'
  18. # value to an attribute of this object and remove it just after. The
  19. # touched attribute is specify by parameter --touch. Of course, couple of
  20. # DN and password provided, must have write right on this attribute.
  21. #
  22. # If your prefer, you can use --replace-touch parameter to replace value
  23. # of touched attribute instead of adding the touched value. Use-ful in
  24. # case of single-value attribute.
  25. #
  26. # This script could be use as Nagios plugin (-n argument)
  27. #
  28. # Requirement:
  29. # A single couple of DN and password able to connect to both server
  30. # and without restriction to retrieve objects from servers.
  31. #
  32. # Author: Benjamin Renard <brenard@easter-eggs.com>
  33. # Source: https://gitea.zionetrix.net/bn8/check_syncrepl_extended
  34. # License: GPLv3
  35. #
  36. import argparse
  37. import logging
  38. import sys
  39. import getpass
  40. import ldap
  41. from ldap import LDAPError # pylint: disable=no-name-in-module
  42. from ldap.controls import SimplePagedResultsControl
  43. from ldap import modlist
  44. VERSION = '0.0'
  45. TOUCH_VALUE = b'%%TOUCH%%'
  46. parser = argparse.ArgumentParser(
  47. description=(
  48. "Script to check LDAP syncrepl replication state between "
  49. "two servers."),
  50. epilog=(
  51. 'Author: Benjamin Renard <brenard@easter-eggs.com>, '
  52. f'Version: {VERSION}, '
  53. 'Source: https://gitea.zionetrix.net/bn8/check_syncrepl_extended')
  54. )
  55. parser.add_argument(
  56. "-p", "--provider",
  57. dest="provider",
  58. action="store",
  59. type=str,
  60. help="LDAP provider URI (example: ldaps://ldapmaster.foo:636)"
  61. )
  62. parser.add_argument(
  63. "-c", "--consumer",
  64. dest="consumer",
  65. action="store",
  66. type=str,
  67. help="LDAP consumer URI (example: ldaps://ldapslave.foo:636)"
  68. )
  69. parser.add_argument(
  70. "-i", "--serverID",
  71. dest="serverid",
  72. action="store",
  73. type=int,
  74. help=(
  75. "Compare contextCSN of a specific master. Useful in MultiMaster "
  76. "setups where each master has a unique ID and a contextCSN for "
  77. "each replicated master exists. A valid serverID is a integer "
  78. "value from 0 to 4095 (limited to 3 hex digits, example: '12' "
  79. "compares the contextCSN matching '#00C#')"),
  80. default=False
  81. )
  82. parser.add_argument(
  83. "-T", "--starttls",
  84. dest="starttls",
  85. action="store_true",
  86. help="Start TLS on LDAP provider/consumers connections",
  87. default=False
  88. )
  89. parser.add_argument(
  90. "-D", "--dn",
  91. dest="dn",
  92. action="store",
  93. type=str,
  94. help="LDAP bind DN (example: uid=nagios,ou=sysaccounts,o=example"
  95. )
  96. parser.add_argument(
  97. "-P", "--pwd",
  98. dest="pwd",
  99. action="store",
  100. type=str,
  101. help="LDAP bind password",
  102. default=None
  103. )
  104. parser.add_argument(
  105. "-b", "--basedn",
  106. dest="basedn",
  107. action="store",
  108. type=str,
  109. help="LDAP base DN (example: o=example)"
  110. )
  111. parser.add_argument(
  112. "-f", "--filter",
  113. dest="filterstr",
  114. action="store",
  115. type=str,
  116. help="LDAP filter (default: (objectClass=*))",
  117. default='(objectClass=*)'
  118. )
  119. parser.add_argument(
  120. "-d", "--debug",
  121. dest="debug",
  122. action="store_true",
  123. help="Debug mode",
  124. default=False
  125. )
  126. parser.add_argument(
  127. "-n", "--nagios",
  128. dest="nagios",
  129. action="store_true",
  130. help="Nagios check plugin mode",
  131. default=False
  132. )
  133. parser.add_argument(
  134. "-q", "--quiet",
  135. dest="quiet",
  136. action="store_true",
  137. help="Quiet mode",
  138. default=False
  139. )
  140. parser.add_argument(
  141. "--no-check-certificate",
  142. dest="nocheckcert",
  143. action="store_true",
  144. help="Don't check the server certificate (Default: False)",
  145. default=False
  146. )
  147. parser.add_argument(
  148. "--no-check-contextCSN",
  149. dest="nocheckcontextcsn",
  150. action="store_true",
  151. help="Don't check servers contextCSN (Default: False)",
  152. default=False
  153. )
  154. parser.add_argument(
  155. "--only-check-contextCSN",
  156. dest="onlycheckcontextcsn",
  157. action="store_true",
  158. help=(
  159. "Only check servers root contextCSN (objects check disabled, "
  160. "default : False)"),
  161. default=False
  162. )
  163. parser.add_argument(
  164. "-a", "--attributes",
  165. dest="attrs",
  166. action="store_true",
  167. help="Check attributes values (Default: check only entryCSN)",
  168. default=False
  169. )
  170. parser.add_argument(
  171. "--exclude-attributes",
  172. dest="excl_attrs",
  173. action="store",
  174. type=str,
  175. help="Don't check this attribut (only in attribute check mode)",
  176. default=None
  177. )
  178. parser.add_argument(
  179. "--touch",
  180. dest="touch",
  181. action="store",
  182. type=str,
  183. help=(
  184. 'Touch attribute giving in parameter to force resync a this LDAP '
  185. f'object from provider. A value "{TOUCH_VALUE.decode()}" will be '
  186. 'add to this attribute and remove after. The user use to connect '
  187. 'to the LDAP directory must have write permission on this '
  188. 'attribute on each object.'
  189. ),
  190. default=None
  191. )
  192. parser.add_argument(
  193. "--replace-touch",
  194. dest="replacetouch",
  195. action="store_true",
  196. help="In touch mode, replace value instead of adding.",
  197. default=False
  198. )
  199. parser.add_argument(
  200. "--remove-touch-value",
  201. dest="removetouchvalue",
  202. action="store_true",
  203. help="In touch mode, remove touch value if present.",
  204. default=False
  205. )
  206. parser.add_argument(
  207. "--page-size",
  208. dest="page_size",
  209. action="store",
  210. type=int,
  211. help=(
  212. "Page size: if defined, paging control using LDAP v3 extended "
  213. "control will be enabled."),
  214. default=None
  215. )
  216. options = parser.parse_args()
  217. if options.nocheckcontextcsn and options.onlycheckcontextcsn:
  218. parser.error(
  219. "You can't use both --no-check-contextCSN and "
  220. "--only-check-contextCSN parameters and the same time")
  221. if options.nagios:
  222. sys.exit(3)
  223. sys.exit(1)
  224. if not options.provider or not options.consumer:
  225. parser.error("You must provide provider and customer URI")
  226. if options.nagios:
  227. sys.exit(3)
  228. sys.exit(1)
  229. if not options.basedn:
  230. parser.error("You must provide base DN of connection to LDAP servers")
  231. if options.nagios:
  232. sys.exit(3)
  233. sys.exit(1)
  234. if not 0 <= options.serverid <= 4095:
  235. parser.error(
  236. "ServerID should be a integer value from 0 to 4095 "
  237. "(limited to 3 hexadecimal digits).")
  238. if options.nagios:
  239. sys.exit(3)
  240. sys.exit(1)
  241. if options.touch and not options.attrs:
  242. logging.info('Force option attrs on touch mode')
  243. options.attrs = True
  244. if options.dn and options.pwd is None:
  245. options.pwd = getpass.getpass()
  246. excl_attrs = []
  247. if options.excl_attrs:
  248. for ex in options.excl_attrs.split(','):
  249. excl_attrs.append(ex.strip())
  250. FORMAT = "%(asctime)s - %(levelname)s: %(message)s"
  251. if options.debug:
  252. logging.basicConfig(level=logging.DEBUG, format=FORMAT)
  253. ldap.set_option(ldap.OPT_DEBUG_LEVEL, 0) # pylint: disable=no-member
  254. elif options.nagios:
  255. logging.basicConfig(level=logging.ERROR, format=FORMAT)
  256. elif options.quiet:
  257. logging.basicConfig(level=logging.WARNING, format=FORMAT)
  258. else:
  259. logging.basicConfig(level=logging.INFO, format=FORMAT)
  260. class LdapServer:
  261. uri = ""
  262. dn = ""
  263. pwd = ""
  264. start_tls = False
  265. con = 0
  266. def __init__(self, uri, dn, pwd, start_tls=False, page_size=None):
  267. self.uri = uri
  268. self.dn = dn
  269. self.pwd = pwd
  270. self.start_tls = start_tls
  271. self.page_size = page_size
  272. def connect(self):
  273. if self.con == 0:
  274. try:
  275. con = ldap.initialize(self.uri)
  276. # pylint: disable=no-member
  277. con.protocol_version = ldap.VERSION3
  278. if self.start_tls:
  279. con.start_tls_s()
  280. if self.dn:
  281. con.simple_bind_s(self.dn, self.pwd)
  282. self.con = con
  283. except LDAPError:
  284. logging.error("LDAP Error", exc_info=True)
  285. return False
  286. return True
  287. def getContextCSN(self, basedn=False, serverid=False):
  288. if not basedn:
  289. basedn = self.dn
  290. data = self.search(
  291. basedn, '(objectclass=*)', attrs=['contextCSN'], scope='base')
  292. if data:
  293. contextCSNs = data[0][0][1]['contextCSN']
  294. logging.debug('Found contextCSNs %s', contextCSNs)
  295. if serverid is False:
  296. return contextCSNs[0]
  297. csnid = str(format(serverid, 'X')).zfill(3)
  298. sub = str.encode(f'#{csnid}#', encoding="ascii", errors="replace")
  299. CSN = [s for s in contextCSNs if sub in s]
  300. if not CSN:
  301. logging.error(
  302. "No contextCSN matching with ServerID %s (=%s) could be "
  303. "found.",
  304. serverid, sub
  305. )
  306. return False
  307. return CSN[0]
  308. return False
  309. @staticmethod
  310. def get_scope(scope):
  311. if scope == 'base':
  312. return ldap.SCOPE_BASE # pylint: disable=no-member
  313. if scope == 'one':
  314. return ldap.SCOPE_ONELEVEL # pylint: disable=no-member
  315. if scope == 'sub':
  316. return ldap.SCOPE_SUBTREE # pylint: disable=no-member
  317. raise Exception(f'Unknown LDAP scope "{scope}"')
  318. def search(self, basedn, filterstr, attrs=None, scope=None):
  319. if self.page_size:
  320. return self.paged_search(
  321. basedn, filterstr, attrs=attrs, scope=scope)
  322. res_id = self.con.search(
  323. basedn, self.get_scope(scope if scope else 'sub'),
  324. filterstr, attrs if attrs else []
  325. )
  326. ret = []
  327. while 1:
  328. res_type, res_data = self.con.result(res_id, 0)
  329. if res_data == []:
  330. break
  331. if res_type == ldap.RES_SEARCH_ENTRY: # pylint: disable=no-member
  332. ret.append(res_data)
  333. return ret
  334. def paged_search(self, basedn, filterstr, attrs=None, scope=None):
  335. ret = []
  336. page = 0
  337. pg_ctrl = SimplePagedResultsControl(True, self.page_size, '')
  338. while page == 0 or pg_ctrl.cookie:
  339. page += 1
  340. logging.debug('Page search: loading page %d', page)
  341. res_id = self.con.search_ext(
  342. basedn, self.get_scope(scope if scope else 'sub'),
  343. filterstr, attrs if attrs else [], serverctrls=[pg_ctrl]
  344. )
  345. # pylint: disable=unused-variable
  346. res_type, res_data, res_id, serverctrls = self.con.result3(res_id)
  347. for serverctrl in serverctrls:
  348. if serverctrl.controlType == SimplePagedResultsControl.controlType:
  349. pg_ctrl.cookie = serverctrl.cookie
  350. break
  351. for item in res_data:
  352. ret.append([item])
  353. return ret
  354. def update_object(self, dn, old, new):
  355. ldif = modlist.modifyModlist(old, new)
  356. if not ldif:
  357. return True
  358. try:
  359. logging.debug('Update object %s: %s', dn, ldif)
  360. self.con.modify_s(dn, ldif)
  361. return True
  362. except LDAPError:
  363. logging.error('Error updating object %s', dn, exc_info=True)
  364. return False
  365. @staticmethod
  366. def get_attr(obj, attr):
  367. if attr in obj[0][1]:
  368. return obj[0][1][attr]
  369. return []
  370. def touch_object(self, dn, attr, orig_value):
  371. old = {}
  372. if orig_value:
  373. old[attr] = orig_value
  374. new = {}
  375. if options.replacetouch:
  376. if not orig_value or TOUCH_VALUE not in orig_value:
  377. new[attr] = [TOUCH_VALUE]
  378. else:
  379. new[attr] = list(orig_value)
  380. if orig_value or TOUCH_VALUE in orig_value:
  381. new[attr].remove(TOUCH_VALUE)
  382. else:
  383. new[attr].append(TOUCH_VALUE)
  384. try:
  385. logging.info(
  386. 'Touch object "%s" on attribute "%s": %s => %s',
  387. dn, attr, old, new
  388. )
  389. if self.update_object(dn, old, new):
  390. logging.info(
  391. 'Restore original value of attribute "%s" of object "%s"',
  392. attr, dn)
  393. if options.removetouchvalue and TOUCH_VALUE in old[attr]:
  394. old[attr].remove(TOUCH_VALUE)
  395. self.update_object(dn=dn, old=new, new=old)
  396. return True
  397. except LDAPError:
  398. logging.error('Error touching object "%s"', dn, exc_info=True)
  399. return False
  400. if options.nocheckcert:
  401. # pylint: disable=no-member
  402. ldap.set_option(
  403. ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_NEVER)
  404. servers = [options.provider, options.consumer]
  405. LdapServers = {}
  406. LdapObjects = {}
  407. LdapServersCSN = {}
  408. for srv in servers:
  409. logging.info('Connect to %s', srv)
  410. LdapServers[srv] = LdapServer(srv, options.dn, options.pwd,
  411. options.starttls,
  412. page_size=options.page_size)
  413. if not LdapServers[srv].connect():
  414. if options.nagios:
  415. print(f'UNKWNON - Failed to connect to {srv}')
  416. sys.exit(3)
  417. else:
  418. sys.exit(1)
  419. if not options.nocheckcontextcsn:
  420. LdapServersCSN[srv] = LdapServers[srv].getContextCSN(
  421. options.basedn, options.serverid)
  422. logging.info('ContextCSN of %s: %s', srv, LdapServersCSN[srv])
  423. if not options.onlycheckcontextcsn:
  424. logging.info('List objects from %s', srv)
  425. LdapObjects[srv] = {}
  426. if options.attrs:
  427. for obj in LdapServers[srv].search(
  428. options.basedn, options.filterstr, []
  429. ):
  430. logging.debug('Found on %s: %s', srv, obj[0][0])
  431. LdapObjects[srv][obj[0][0]] = obj[0][1]
  432. else:
  433. for obj in LdapServers[srv].search(
  434. options.basedn, options.filterstr, ['entryCSN']
  435. ):
  436. logging.debug(
  437. 'Found on %s: %s / %s',
  438. srv, obj[0][0], obj[0][1]['entryCSN'][0]
  439. )
  440. LdapObjects[srv][obj[0][0]] = obj[0][1]['entryCSN'][0]
  441. logging.info('%s objects founds', len(LdapObjects[srv]))
  442. if not options.onlycheckcontextcsn:
  443. not_found = {}
  444. not_sync = {}
  445. for srv in servers:
  446. not_found[srv] = []
  447. not_sync[srv] = []
  448. if options.attrs:
  449. logging.info(
  450. "Check if objects a are synchronized (by comparing attributes's "
  451. "values)")
  452. else:
  453. logging.info(
  454. 'Check if objets are synchronized (by comparing entryCSN)')
  455. for obj in LdapObjects[options.provider]:
  456. logging.debug('Check obj %s', obj)
  457. for srv_name, srv in LdapObjects.items():
  458. if srv_name == options.provider:
  459. continue
  460. if obj in srv:
  461. touch = False
  462. if LdapObjects[options.provider][obj] != srv[obj]:
  463. if options.attrs:
  464. attrs_list = []
  465. for attr in LdapObjects[options.provider][obj]:
  466. if attr in excl_attrs:
  467. continue
  468. if attr not in srv[obj]:
  469. attrs_list.append(attr)
  470. logging.debug(
  471. "Obj %s not synchronized: %s not present on %s",
  472. obj, ','.join(attrs_list), srv_name
  473. )
  474. touch = True
  475. else:
  476. srv[obj][attr].sort()
  477. LdapObjects[options.provider][obj][attr].sort()
  478. if srv[obj][attr] != LdapObjects[options.provider][obj][attr]:
  479. attrs_list.append(attr)
  480. logging.debug(
  481. "Obj %s not synchronized: %s not same value(s)",
  482. obj, ','.join(attrs_list)
  483. )
  484. touch = True
  485. if attrs_list:
  486. not_sync[srv_name].append(f'{obj} ({",".join(attrs_list)})')
  487. else:
  488. logging.debug(
  489. "Obj %s not synchronized: %s <-> %s",
  490. obj, LdapObjects[options.provider][obj], srv[obj]
  491. )
  492. not_sync[srv_name].append(obj)
  493. if touch and options.touch:
  494. orig_value = []
  495. if options.touch in LdapObjects[options.provider][obj]:
  496. orig_value = LdapObjects[options.provider][obj][options.touch]
  497. LdapServers[options.provider].touch_object(
  498. obj, options.touch, orig_value)
  499. else:
  500. logging.debug('Obj %s: not found on %s', obj, srv_name)
  501. not_found[srv_name].append(obj)
  502. if options.touch:
  503. orig_value = []
  504. if options.touch in LdapObjects[options.provider][obj]:
  505. orig_value = LdapObjects[options.provider][obj][options.touch]
  506. LdapServers[options.provider].touch_object(
  507. obj, options.touch, orig_value)
  508. for obj in LdapObjects[options.consumer]:
  509. logging.debug('Check obj %s of consumer', obj)
  510. if obj not in LdapObjects[options.provider]:
  511. logging.debug('Obj %s: not found on provider', obj)
  512. not_found[options.provider].append(obj)
  513. if options.nagios:
  514. errors = []
  515. long_output = []
  516. if not options.nocheckcontextcsn:
  517. if not LdapServersCSN[options.provider]:
  518. errors.append('ContextCSN of LDAP server provider could not be found')
  519. else:
  520. long_output.append(
  521. f'ContextCSN on LDAP server provider = {LdapServersCSN[options.provider]}')
  522. for srv_name, srv_csn in LdapServersCSN.items():
  523. if srv_name == options.provider:
  524. continue
  525. if not srv_csn:
  526. errors.append(f'ContextCSN of {srv_name} not found')
  527. elif srv_csn != LdapServersCSN[options.provider]:
  528. errors.append(
  529. f'ContextCSN of {srv_name} not the same of provider')
  530. long_output.append(
  531. f'ContextCSN on LDAP server {srv_name} = {srv_csn}')
  532. if not options.onlycheckcontextcsn:
  533. if not_found[options.consumer]:
  534. errors.append(
  535. f'{len(not_found[options.consumer])} not found object(s) on '
  536. 'consumer')
  537. long_output.append(
  538. f'Object(s) not found on server {options.consumer} '
  539. '(consumer):')
  540. for obj in not_found[options.consumer]:
  541. long_output.append(f' - {obj}')
  542. if not_found[options.provider]:
  543. errors.append(
  544. f'{len(not_found[options.provider])} not found object(s) on '
  545. 'provider')
  546. long_output.append(
  547. f'Object(s) not found on server {options.provider} '
  548. '(provider):')
  549. for obj in not_found[options.provider]:
  550. long_output.append(f' - {obj}')
  551. if not_sync[options.consumer]:
  552. errors.append(
  553. f'{len(not_sync[options.consumer])} not synchronized object(s) '
  554. 'on consumer')
  555. long_output.append(
  556. f'Object(s) not synchronized on server {options.consumer} '
  557. '(consumer):')
  558. for obj in not_sync[options.consumer]:
  559. long_output.append(f' - {obj}')
  560. if errors:
  561. print(f'CRITICAL: {", ".join(errors)}')
  562. print('\n\n')
  563. print("\n".join(long_output))
  564. sys.exit(2)
  565. else:
  566. print('OK: consumer and provider are synchronized')
  567. sys.exit(0)
  568. else:
  569. noerror = True
  570. for srv in servers:
  571. if not options.nocheckcontextcsn:
  572. if not LdapServersCSN[options.provider]:
  573. logging.warning(
  574. 'ContextCSN of LDAP server provider could not be found')
  575. noerror = False
  576. else:
  577. for srv_name, srv_csn in LdapServersCSN.items():
  578. if srv_name == options.provider:
  579. continue
  580. if not srv_csn:
  581. logging.warning('ContextCSN of %s not found', srv_name)
  582. noerror = False
  583. elif srv_csn != LdapServersCSN[options.provider]:
  584. logging.warning(
  585. 'ContextCSN of %s not the same of provider',
  586. srv_name)
  587. noerror = False
  588. if not options.onlycheckcontextcsn:
  589. if not_found[srv]:
  590. logging.warning(
  591. 'Not found objects on %s :\n - %s',
  592. srv, '\n - '.join(not_found[srv])
  593. )
  594. noerror = False
  595. if not_sync[srv]:
  596. logging.warning(
  597. 'Not sync objects on %s: %s',
  598. srv, '\n - '.join(not_sync[srv])
  599. )
  600. noerror = False
  601. if noerror:
  602. logging.info('No sync problem detected')