Przeglądaj źródła

server: move ISP name normalization into separate function

Helge Jung 9 lat temu
rodzic
commit
f612f9bfd5
1 zmienionych plików z 36 dodań i 15 usunięć
  1. 36 15
      ffstatus/server.py

+ 36 - 15
ffstatus/server.py

@@ -14,6 +14,41 @@ import time
 
 import ffstatus
 
+# each match of these regex is removed to normalize an ISP's name
+ISP_NORMALIZATIONS = [
+    # normalize name: strip company indication
+    re.compile(r'(AG|UG|G?mbH( ?& ?Co\.? ?(OH|K)G)?)$', flags=re.IGNORECASE),
+
+    # normalize name: strip "pool" suffixes
+    re.compile(r'(dynamic )?(customer |subscriber )?(ip )?(pool|(address )?range|addresses)$', flags=re.IGNORECASE),
+
+    # normalize name: strip "B2B" and aggregation suffixes
+    re.compile(r'(aggregate|aggregation)?$', flags=re.IGNORECASE),
+    re.compile(r'(B2B)?$', flags=re.IGNORECASE),
+
+    # normalize name: strip country suffixes (in Germany)
+    re.compile(r'(' +
+               'DE|Deutschland|Germany|' +
+               'Nordrhein[- ]Westfalen|NRW|' +
+               'Baden[- ]Wuerttemburg|BW|' +
+               'Hessen|' +
+               'Niedersachsen|' +
+               'Rheinland[- ]Pfalz|RLP' +
+               ')$',
+               flags=re.IGNORECASE),
+]
+
+
+def normalize_ispname(isp):
+    """Removes all matches on ISP_NORMALIZATIONS."""
+    isp = isp.strip()
+
+    for regex in ISP_NORMALIZATIONS:
+        isp = regex.sub('', isp).strip()
+
+    return isp
+
+
 class BatcaveHttpRequestHandler(BaseHTTPRequestHandler):
 
     def __init__(self, request, client_address, server):
@@ -360,22 +395,8 @@ class BatcaveHttpRequestHandler(BaseHTTPRequestHandler):
                 if isinstance(remote, dict):
                     ispblock = remote['name']
                     desc_lines = remote['description'].split('\n')
-                    isp = desc_lines[0].strip()
-
-                    # normalize name: strip company indication
-                    isp = re.sub(r'(AG|UG|G?mbH( ?& ?Co\.? ?(OH|K)G)?)$', '', isp, flags=re.IGNORECASE).strip()
-
-                    # normalize name: strip "pool" suffixes
-                    isp = re.sub(r'(dynamic )?(customer |subscriber )?(ip )?(pool|(address )?range|addresses)$', '', isp, flags=re.IGNORECASE).strip()
-
-                    # normalize name: strip "B2B" and aggregation suffixes
-                    isp = re.sub(r'(aggregate|aggregation)?$', '', isp, flags=re.IGNORECASE).strip()
-                    isp = re.sub(r'(B2B)?$', '', isp, flags=re.IGNORECASE).strip()
-
-                    # normalize name: strip country suffixes (in Germany)
-                    isp = re.sub(r'(DE|Deutschland|Germany|Nordrhein[- ]Westfalen|NRW|Baden[- ]Wuerttemburg|BW|Hessen|Niedersachsen|Rheinland[- ]Pfalz|RLP)$', '', isp, flags=re.IGNORECASE).strip()
+                    isp = normalize_ispname(desc_lines[0])
 
-                isp = str(isp)
                 if not isp in ispblocks:
                     ispblocks[isp] = set()
                 ispblocks[isp].add(ispblock)