From 23ba1b01ac2beb5ee26919cce2e056ce4375579f Mon Sep 17 00:00:00 2001 From: Russell C Kendall Date: Thu, 7 Jul 2022 16:45:32 -0500 Subject: [PATCH 1/7] initial rename arg parsing and geo by region and city use bitdiscovery api to add ip ranges to inventory complete README --- .gitignore | 133 ++++++++++++++++++++++++++++++++++++++++++ README.md | 40 +++++++++++++ add-by-geo.py | 149 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 + 4 files changed, 324 insertions(+) create mode 100644 .gitignore create mode 100644 add-by-geo.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fa4b71e --- /dev/null +++ b/.gitignore @@ -0,0 +1,133 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +venv +ip2loc/* + diff --git a/README.md b/README.md index 20ee63e..92e67d3 100644 --- a/README.md +++ b/README.md @@ -94,3 +94,43 @@ Delete an IP or a source (with its id) from the given inventory: python3 delete-ip.py ip 1.1.1.1 $APIKEY python3 delete-ip.py source 13 $APIKEY ``` + +## Add IP by geography + +The `add-by-geo.py` script adds country-specific IP sources to your inventory based on the IP2 location repository. + +__Note: this may add a huge number of sources, it is not recommended to use this script__ +__without first running without the --add-to-inventory parameter__ + +### Usage + +First, register for an account with IP2Location LITE - http://lite.ip2location.com + +Second, download the IP2location-lite database files (IPv6) CSV files and place in the `./ip2loc/` folder. + +Third, install the dependencies: + +```shell +pip install argparse requests sortedcontainers +``` + +Finally, update APIURL and obtain a single-inventory API key to use as argument to --apikey + +Add to inventory all country-specific (-c = UA) IP ranges (-4, IPv4 only) as sources: + +```shell +python3 add-by-geo.py -4 -f COUNTRY_CODE=UA --add-to-inventory --apikey=asdfasdf +``` + +It is an error to use `--add-to-inventory` without a valid, single-inventory API key. + +Output, but do not add to inventory all ip ranges coorsponding to Los Angeles in country=US: + +```shell +python3 add-by-geo.py -f COUNTRY_CODE=US -f CITY="Los Angeles" +``` + +Output, but do not add to inventory all IP ranges in any country, region, or city named "Dallas" +```shell +python3 add-by-geo.py -f ANY="Dallas" +``` \ No newline at end of file diff --git a/add-by-geo.py b/add-by-geo.py new file mode 100644 index 0000000..e8af4ff --- /dev/null +++ b/add-by-geo.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +import argparse +import csv +from enum import Enum +import ipaddress +import operator +from sortedcontainers import SortedList +from typing import List, Sequence + +from bitdiscovery.api import BitDiscoveryApi, try_multiple_times + +ZERO_IP_ADDRESS = ipaddress.ip_address(0) +LOC_DB_PATH: str = ("ip2loc/IP2LOCATION-LITE-DB3.IPV6.CSV",) +APIURL = "https://asm-demo.cloud.tenable.com/api/1.0" + + +class FILTER_CRITERIA(Enum): + ANY = 'ANY' + COUNTRY_CODE = 'COUNTRY_CODE' + COUNTRY_NAME = 'COUNTRY_NAME' + REGION = 'REGION' + CITY = 'CITY' + + def __str__(self): + return self.value + + def __int__(self): + return ['PLACEHOLDER', 'ANY', 'COUNTRY_CODE', 'COUNTRY_NAME', 'REGION', 'CITY'].index(self.value) + + +def filter_row(row: List(str), filters: dict[FILTER_CRITERIA, str]) -> bool: + + if FILTER_CRITERIA.ANY.value in filters.keys() and filters[FILTER_CRITERIA.ANY.value].upper() in map(str.upper, row): + return True + + for filter, argument in filters.items(): + if filter == FILTER_CRITERIA.ANY: + pass + if row[int(FILTER_CRITERIA(filter))].upper() != argument.upper(): + return False + return True + + +def sort_input(row: str, ipv4_only: bool) -> None: + start, end, *rest = row + + try: + start = int(start) + end = int(end) + except: + raise + + if ipaddress.ip_address(start).ipv4_mapped: + start = int(ipaddress.ip_address(start).ipv4_mapped) + end = int(ipaddress.ip_address(end).ipv4_mapped) + + if ipv4_only and 6 == ipaddress.ip_address(start).version: + return + + global ip_ranges + ip_ranges.add((start, end)) + + +class filterValue(argparse.Action): + # Constructor calling + def __call__(self, parser, namespace, + values, option_string=None): + if not getattr(namespace, self.dest): + setattr(namespace, self.dest, dict()) + for value in values: + # split it into key and value + key, value = value.split('=') + # assign into dictionary + if key in FILTER_CRITERIA._member_names_: + getattr(namespace, self.dest)[key] = value + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser( + "Get IP ranges by geography and optionally add to ASM inventory") + + parser.add_argument("-4", "--ipv4_only", + action="store_true", default=False, help="Do not report IPv6 addresses") + parser.add_argument("--add-to-inventory", action="store_true", + default=False, help="Required to add sources to inventory") + parser.add_argument('--apikey', metavar="APIKEY", type=str, + help="Your Bit Discovery API key. Must be for a single inventory") + parser.add_argument('-f', '--filter', + nargs='*', + action=filterValue, + help=f"Multiple Key=Value criteria where Key is in {FILTER_CRITERIA._member_names_}", + ) + args = parser.parse_args(argv) + + print(args) + + assert(bool(args.add_to_inventory) == bool(args.apikey)) + + retv = 0 + global ip_ranges + ip_ranges = SortedList(key=operator.itemgetter(0)) + for file in LOC_DB_PATH: + + with open(file, newline="") as f: + reader = csv.reader(f, delimiter=",", quotechar='"') + for row in reader: + if filter_row(row, args.filter): + sort_input(row, args.ipv4_only) + + # apply interval merge algorithm on sorted input + if not ip_ranges: + return retv + + resa = [] + resa.append(ip_ranges[0]) + for i in range(len(ip_ranges)): + + if ip_ranges[i][0] <= resa[-1][1]+1: + resa[-1] = (resa[-1][0], max(ip_ranges[i][1], resa[-1][1])) + else: + resa.append(ip_ranges[i]) + + api = None + if args.add_to_inventory: + api = BitDiscoveryApi(APIURL, args.apikey) + inventories = api.find_inventories(0,0) + assert(inventories.json.code == 200) + + for i, (start, end) in enumerate(resa): + ip_range = f"{str(ipaddress.ip_address(start))}-{str(ipaddress.ip_address(end))}" + if args.add_to_inventory: + + result = try_multiple_times( + lambda: api.add_ip(ip_range), + max_tries=5 + ) + if result is None: + print(f"API call failed too many times for {ip_range}") + + else: + print( + f"{i:20} {ip_range}") + + return retv + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/requirements.txt b/requirements.txt index 7993b77..63c2156 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +argparse==1.4.0 certifi==2020.12.5 chardet==4.0.0 cycler==0.10.0 @@ -19,6 +20,7 @@ pytz==2020.5 requests==2.25.1 sh==1.14.1 six==1.15.0 +sortedcontainers==2.4.0 tld==0.12.5 urllib3==1.26.2 zope.interface==5.2.0 From a336f9a72b89333cac7fc417bb1d57c21912bc7c Mon Sep 17 00:00:00 2001 From: Russell C Kendall Date: Mon, 18 Jul 2022 11:52:30 -0500 Subject: [PATCH 2/7] Update inventory handling --- add-by-geo.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/add-by-geo.py b/add-by-geo.py index e8af4ff..3333e1d 100644 --- a/add-by-geo.py +++ b/add-by-geo.py @@ -124,20 +124,23 @@ def main(argv: Sequence[str] | None = None) -> int: api = None if args.add_to_inventory: api = BitDiscoveryApi(APIURL, args.apikey) - inventories = api.find_inventories(0,0) - assert(inventories.json.code == 200) + inventories = api.find_inventories(0, 0) + + assert(inventories['code'] == 400) + assert(inventories['message'] == + 'Your API access is limited to a single inventory.') for i, (start, end) in enumerate(resa): ip_range = f"{str(ipaddress.ip_address(start))}-{str(ipaddress.ip_address(end))}" if args.add_to_inventory: - + result = try_multiple_times( lambda: api.add_ip(ip_range), max_tries=5 ) if result is None: print(f"API call failed too many times for {ip_range}") - + else: print( f"{i:20} {ip_range}") From f783fdff2f4101489c6dff1fb577cc2181fc64bd Mon Sep 17 00:00:00 2001 From: Russell C Kendall Date: Wed, 20 Jul 2022 20:22:07 -0500 Subject: [PATCH 3/7] SCCP-45 added capped exponential back off to throttle --- add-by-geo.py | 19 +++++++++++++++++-- test/TEST.CSV | 4 ++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 test/TEST.CSV diff --git a/add-by-geo.py b/add-by-geo.py index 3333e1d..9177311 100644 --- a/add-by-geo.py +++ b/add-by-geo.py @@ -6,10 +6,15 @@ import ipaddress import operator from sortedcontainers import SortedList +import time from typing import List, Sequence from bitdiscovery.api import BitDiscoveryApi, try_multiple_times +# max time to wait for the API to update based on loaded ip_range +# after which the next ip_range will be added, this is throttling behavior. +MAX_WAIT_TIME = 7200 + ZERO_IP_ADDRESS = ipaddress.ip_address(0) LOC_DB_PATH: str = ("ip2loc/IP2LOCATION-LITE-DB3.IPV6.CSV",) APIURL = "https://asm-demo.cloud.tenable.com/api/1.0" @@ -125,7 +130,7 @@ def main(argv: Sequence[str] | None = None) -> int: if args.add_to_inventory: api = BitDiscoveryApi(APIURL, args.apikey) inventories = api.find_inventories(0, 0) - + assert(inventories['code'] == 400) assert(inventories['message'] == 'Your API access is limited to a single inventory.') @@ -141,9 +146,19 @@ def main(argv: Sequence[str] | None = None) -> int: if result is None: print(f"API call failed too many times for {ip_range}") + elapsed_wait, backoff_delay = 0, 1 # 1 second + + while elapsed_wait < MAX_WAIT_TIME \ + and api.search_for_source(0, 0, ip_range)['searches'][0]['dbdata'] == None: + # backoff, blocking wait for API to consume the ip range source + print(f'{ip_range}: max wait time remaining: {MAX_WAIT_TIME - elapsed_wait} s') + time.sleep(backoff_delay) + elapsed_wait += backoff_delay + backoff_delay = min(514, backoff_delay << 1) + else: print( - f"{i:20} {ip_range}") + f'{i:20} {ip_range}') return retv diff --git a/test/TEST.CSV b/test/TEST.CSV new file mode 100644 index 0000000..8f4a45c --- /dev/null +++ b/test/TEST.CSV @@ -0,0 +1,4 @@ +"281471306069760","281471306069761","11","Country1","Region1","City1" +"281471306069762","281471306069763","11","Country1","Region1","City1" +"281471306069766","281471306069766","11","Country1","Region2","City3" +"281471306070528","281471306070528","22","Country2","Region2","City4" From 6507f63c430b813ca38584cf6fa4a5cb8c0e901a Mon Sep 17 00:00:00 2001 From: Russell C Kendall Date: Thu, 21 Jul 2022 11:14:58 -0500 Subject: [PATCH 4/7] backoff_delay at most is max time remaining --- add-by-geo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/add-by-geo.py b/add-by-geo.py index 9177311..95ed876 100644 --- a/add-by-geo.py +++ b/add-by-geo.py @@ -154,7 +154,7 @@ def main(argv: Sequence[str] | None = None) -> int: print(f'{ip_range}: max wait time remaining: {MAX_WAIT_TIME - elapsed_wait} s') time.sleep(backoff_delay) elapsed_wait += backoff_delay - backoff_delay = min(514, backoff_delay << 1) + backoff_delay = min(514, backoff_delay << 1, MAX_WAIT_TIME - elapsed_wait) else: print( From 785e806dfc26361ffd6f1f04d6340a36bdf980ac Mon Sep 17 00:00:00 2001 From: Russell C Kendall Date: Mon, 25 Jul 2022 13:13:55 -0500 Subject: [PATCH 5/7] Patch bug in interval algorithm --- add-by-geo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/add-by-geo.py b/add-by-geo.py index 95ed876..5d7fe3f 100644 --- a/add-by-geo.py +++ b/add-by-geo.py @@ -11,7 +11,7 @@ from bitdiscovery.api import BitDiscoveryApi, try_multiple_times -# max time to wait for the API to update based on loaded ip_range +# max time (seconds) to wait for the API to update based on loaded ip_range # after which the next ip_range will be added, this is throttling behavior. MAX_WAIT_TIME = 7200 @@ -121,7 +121,7 @@ def main(argv: Sequence[str] | None = None) -> int: resa.append(ip_ranges[0]) for i in range(len(ip_ranges)): - if ip_ranges[i][0] <= resa[-1][1]+1: + if ip_ranges[i][0] <= resa[i-1][1]+1: resa[-1] = (resa[-1][0], max(ip_ranges[i][1], resa[-1][1])) else: resa.append(ip_ranges[i]) From b618a62a9fece9280c18cdd6d5b679a7671c8b1a Mon Sep 17 00:00:00 2001 From: Russell C Kendall Date: Mon, 25 Jul 2022 13:14:14 -0500 Subject: [PATCH 6/7] data folder hide --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index fa4b71e..c9f04c0 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,4 @@ dmypy.json venv ip2loc/* +data/* \ No newline at end of file From 89cc101a96ea2c7149559183be4fd4d81a73ae6b Mon Sep 17 00:00:00 2001 From: Russell C Kendall Date: Mon, 25 Jul 2022 16:00:42 -0500 Subject: [PATCH 7/7] Roll back bad patch to interval logic --- add-by-geo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/add-by-geo.py b/add-by-geo.py index 5d7fe3f..e05f374 100644 --- a/add-by-geo.py +++ b/add-by-geo.py @@ -121,7 +121,7 @@ def main(argv: Sequence[str] | None = None) -> int: resa.append(ip_ranges[0]) for i in range(len(ip_ranges)): - if ip_ranges[i][0] <= resa[i-1][1]+1: + if ip_ranges[i][0] <= resa[-1][1]+1: resa[-1] = (resa[-1][0], max(ip_ranges[i][1], resa[-1][1])) else: resa.append(ip_ranges[i])