From 24acdb7a8304c1936321454f95fdf2ee71b71a77 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 12:14:52 -0400 Subject: [PATCH 01/63] let there be dns engine --- bbot/core/config/__init__.py | 12 + bbot/core/config/logger.py | 36 +- bbot/core/core.py | 27 ++ bbot/core/engine.py | 189 ++++++++++ bbot/core/helpers/dns/__init__.py | 1 + bbot/core/helpers/dns/dns.py | 234 +++++++++++++ bbot/core/helpers/{dns.py => dns/engine.py} | 326 +++++++----------- bbot/core/helpers/dns/mock.py | 55 +++ bbot/core/helpers/misc.py | 27 ++ bbot/scanner/scanner.py | 8 +- bbot/test/bbot_fixtures.py | 70 +--- bbot/test/test_step_1/test_dns.py | 146 +++++--- .../test_step_1/test_manager_deduplication.py | 2 +- .../test_manager_scope_accuracy.py | 2 +- bbot/test/test_step_1/test_modules_basic.py | 3 +- bbot/test/test_step_1/test_scan.py | 4 +- bbot/test/test_step_2/module_tests/base.py | 4 +- .../module_tests/test_module_affiliates.py | 2 +- .../module_tests/test_module_aggregate.py | 2 +- .../test_module_asset_inventory.py | 2 +- .../module_tests/test_module_baddns.py | 4 +- .../module_tests/test_module_baddns_zone.py | 4 +- .../module_tests/test_module_dehashed.py | 2 +- .../module_tests/test_module_dnscommonsrv.py | 2 +- .../module_tests/test_module_internetdb.py | 2 +- .../module_tests/test_module_ipneighbor.py | 2 +- .../module_tests/test_module_postman.py | 2 +- .../module_tests/test_module_speculate.py | 2 +- 28 files changed, 823 insertions(+), 349 deletions(-) create mode 100644 bbot/core/engine.py create mode 100644 bbot/core/helpers/dns/__init__.py create mode 100644 bbot/core/helpers/dns/dns.py rename bbot/core/helpers/{dns.py => dns/engine.py} (78%) create mode 100644 bbot/core/helpers/dns/mock.py diff --git a/bbot/core/config/__init__.py b/bbot/core/config/__init__.py index e69de29bb2..c36d91f487 100644 --- a/bbot/core/config/__init__.py +++ b/bbot/core/config/__init__.py @@ -0,0 +1,12 @@ +import sys +import multiprocessing as mp + +try: + mp.set_start_method("spawn") +except Exception: + start_method = mp.get_start_method() + if start_method != "spawn": + print( + f"[WARN] Multiprocessing spawn method is set to {start_method}. This may negatively affect performance.", + file=sys.stderr, + ) diff --git a/bbot/core/config/logger.py b/bbot/core/config/logger.py index a4063027b8..3844a65fcf 100644 --- a/bbot/core/config/logger.py +++ b/bbot/core/config/logger.py @@ -1,6 +1,8 @@ import sys +import atexit import logging from copy import copy +import multiprocessing import logging.handlers from pathlib import Path @@ -60,13 +62,29 @@ def __init__(self, core): self.core_logger = logging.getLogger("bbot") self.core = core - # Don't do this more than once - if len(self.core_logger.handlers) == 0: - for logger in self.loggers: - self.include_logger(logger) + self.process_name = multiprocessing.current_process().name + if self.process_name == "MainProcess": + self.queue = multiprocessing.Queue() + self.setup_queue_handler() + # Start the QueueListener + self.listener = logging.handlers.QueueListener(self.queue, *self.log_handlers.values()) + self.listener.start() + atexit.register(self.listener.stop) self.log_level = logging.INFO + def setup_queue_handler(self, logging_queue=None): + if logging_queue is None: + logging_queue = self.queue + else: + self.queue = logging_queue + self.queue_handler = logging.handlers.QueueHandler(logging_queue) + logging.getLogger().addHandler(self.queue_handler) + self.core_logger.setLevel(self.log_level) + # disable asyncio logging for child processes + if self.process_name != "MainProcess": + logging.getLogger("asyncio").setLevel(logging.ERROR) + def addLoggingLevel(self, levelName, levelNum, methodName=None): """ Comprehensively adds a new logging level to the `logging` module and the @@ -127,15 +145,19 @@ def loggers(self): return self._loggers def add_log_handler(self, handler, formatter=None): + if self.listener is None: + return if handler.formatter is None: handler.setFormatter(debug_format) for logger in self.loggers: - if handler not in logger.handlers: + if handler not in self.listener.handlers: logger.addHandler(handler) def remove_log_handler(self, handler): + if self.listener is None: + return for logger in self.loggers: - if handler in logger.handlers: + if handler in self.listener.handlers: logger.removeHandler(handler) def include_logger(self, logger): @@ -144,7 +166,7 @@ def include_logger(self, logger): if self.log_level is not None: logger.setLevel(self.log_level) for handler in self.log_handlers.values(): - logger.addHandler(handler) + self.add_log_handler(handler) @property def log_handlers(self): diff --git a/bbot/core/core.py b/bbot/core/core.py index 313aa236a3..a383077635 100644 --- a/bbot/core/core.py +++ b/bbot/core/core.py @@ -1,5 +1,7 @@ import logging +import traceback from copy import copy +import multiprocessing from pathlib import Path from omegaconf import OmegaConf @@ -20,6 +22,26 @@ class BBOTCore: - load quickly """ + class BBOTProcess(multiprocessing.Process): + + def __init__(self, *args, **kwargs): + self.logging_queue = kwargs.pop("logging_queue") + super().__init__(*args, **kwargs) + + def run(self): + log = logging.getLogger("bbot.core.process") + try: + from bbot.core import CORE + + CORE.logger.setup_queue_handler(self.logging_queue) + super().run() + except KeyboardInterrupt: + log.warning(f"Got KeyboardInterrupt in {self.name}") + log.trace(traceback.format_exc()) + except BaseException as e: + log.warning(f"Error in {self.name}: {e}") + log.trace(traceback.format_exc()) + def __init__(self): self._logger = None self._files_config = None @@ -142,6 +164,11 @@ def files_config(self): self._files_config = files.BBOTConfigFiles(self) return self._files_config + def create_process(self, *args, **kwargs): + process = self.BBOTProcess(*args, logging_queue=self.logger.queue, **kwargs) + process.daemon = True + return process + @property def logger(self): self.config diff --git a/bbot/core/engine.py b/bbot/core/engine.py new file mode 100644 index 0000000000..5f4530ba17 --- /dev/null +++ b/bbot/core/engine.py @@ -0,0 +1,189 @@ +import zmq +import atexit +import pickle +import asyncio +import inspect +import logging +import tempfile +import traceback +import zmq.asyncio +from pathlib import Path +from contextlib import contextmanager + +from bbot.core import CORE +from bbot.core.helpers.misc import rand_string + +CMD_EXIT = 1000 + + +class EngineClient: + + SERVER_CLASS = None + + def __init__(self, **kwargs): + self.name = f"EngineClient {self.__class__.__name__}" + if self.SERVER_CLASS is None: + raise ValueError(f"Must set EngineClient SERVER_CLASS, {self.SERVER_CLASS}") + self.CMDS = dict(self.SERVER_CLASS.CMDS) + for k, v in list(self.CMDS.items()): + self.CMDS[v] = k + self.log = logging.getLogger(f"bbot.core.{self.__class__.__name__.lower()}") + self.socket_address = f"zmq_{rand_string(8)}.sock" + self.socket_path = Path(tempfile.gettempdir()) / self.socket_address + self.server_kwargs = kwargs.pop("server_kwargs", {}) + self.server_process = self.start_server() + self.context = zmq.asyncio.Context() + + async def run_and_return(self, command, **kwargs): + with self.new_socket() as socket: + message = self.make_message(command, args=kwargs) + await socket.send(message) + binary = await socket.recv() + self.log.debug(f"{self.name}.{command}({kwargs}) got binary: {binary}") + message = pickle.loads(binary) + self.log.debug(f"{self.name}.{command}({kwargs}) got message: {message}") + # error handling + if self.check_error(message): + return + return message + + async def run_and_yield(self, command, **kwargs): + message = self.make_message(command, args=kwargs) + with self.new_socket() as socket: + await socket.send(message) + while 1: + binary = await socket.recv() + self.log.debug(f"{self.name}.{command}({kwargs}) got binary: {binary}") + message = pickle.loads(binary) + self.log.debug(f"{self.name}.{command}({kwargs}) got message: {message}") + # error handling + if self.check_error(message) or self.check_stop(message): + break + yield message + + def check_error(self, message): + if isinstance(message, dict) and len(message) == 1 and "_e" in message: + error, trace = message["_e"] + self.log.error(error) + self.log.trace(trace) + return True + return False + + def check_stop(self, message): + if isinstance(message, dict) and len(message) == 1 and "_s" in message: + return True + return False + + def make_message(self, command, args): + try: + cmd_id = self.CMDS[command] + except KeyError: + raise KeyError(f'Command "{command}" not found. Available commands: {",".join(self.available_commands)}') + return pickle.dumps(dict(c=cmd_id, a=args)) + + @property + def available_commands(self): + return [s for s in self.CMDS if isinstance(s, str)] + + def start_server(self, **server_kwargs): + process = CORE.create_process( + target=self.server_process, + args=( + self.SERVER_CLASS, + self.socket_path, + ), + kwargs=self.server_kwargs, + ) + process.start() + return process + + @staticmethod + def server_process(server_class, socket_path, **kwargs): + engine_server = server_class(socket_path, **kwargs) + asyncio.run(engine_server.worker()) + + @contextmanager + def new_socket(self): + socket = self.context.socket(zmq.DEALER) + socket.connect(f"ipc://{self.socket_path}") + try: + yield socket + finally: + socket.close() + + +class EngineServer: + + CMDS = {} + + def __init__(self, socket_path): + self.log = logging.getLogger(f"bbot.core.{self.__class__.__name__.lower()}") + self.name = f"EngineServer {self.__class__.__name__}" + if socket_path is not None: + # create ZeroMQ context + self.context = zmq.asyncio.Context() + # ROUTER socket can handle multiple concurrent requests + self.socket = self.context.socket(zmq.ROUTER) + # create socket file + self.socket.bind(f"ipc://{socket_path}") + # delete socket file on exit + atexit.register(socket_path.unlink, missing_ok=True) + + async def run_and_return(self, client_id, command_fn, **kwargs): + self.log.debug(f"{self.name} run-and-return {command_fn.__name__}({kwargs})") + try: + result = await command_fn(**kwargs) + except Exception as e: + error = f"Unhandled error in {self.name}.{command_fn.__name__}({kwargs}): {e}" + trace = traceback.format_exc() + result = {"_e": (error, trace)} + await self.socket.send_multipart([client_id, pickle.dumps(result)]) + + async def run_and_yield(self, client_id, command_fn, **kwargs): + self.log.debug(f"{self.name} run-and-yield {command_fn.__name__}({kwargs})") + try: + async for _ in command_fn(**kwargs): + await self.socket.send_multipart([client_id, pickle.dumps(_)]) + await self.socket.send_multipart([client_id, pickle.dumps({"_s": None})]) + except Exception as e: + error = f"Unhandled error in {self.name}.{command_fn.__name__}({kwargs}): {e}" + trace = traceback.format_exc() + result = {"_e": (error, trace)} + await self.socket.send_multipart([client_id, pickle.dumps(result)]) + + async def worker(self): + try: + while 1: + client_id, binary = await self.socket.recv_multipart() + self.log.debug(f"{self.name} got binary: {binary}") + message = pickle.loads(binary) + self.log.debug(f"{self.name} got message: {message}") + + cmd = message.get("c", None) + if not isinstance(cmd, int): + self.log.warning(f"No command sent in message: {message}") + continue + + kwargs = message.get("a", {}) + if not isinstance(kwargs, dict): + self.log.warning(f"{self.name}: received invalid message of type {type(kwargs)}, should be dict") + continue + + command_name = self.CMDS[cmd] + command_fn = getattr(self, command_name, None) + + if command_fn is None: + self.log.warning(f'{self.name} has no function named "{command_fn}"') + continue + + if inspect.isasyncgenfunction(command_fn): + coroutine = self.run_and_yield(client_id, command_fn, **kwargs) + else: + coroutine = self.run_and_return(client_id, command_fn, **kwargs) + + asyncio.create_task(coroutine) + except Exception as e: + self.log.error(f"Error in EngineServer worker: {e}") + self.log.trace(traceback.format_exc()) + finally: + self.socket.close() diff --git a/bbot/core/helpers/dns/__init__.py b/bbot/core/helpers/dns/__init__.py new file mode 100644 index 0000000000..75426cd265 --- /dev/null +++ b/bbot/core/helpers/dns/__init__.py @@ -0,0 +1 @@ +from .dns import DNSHelper diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py new file mode 100644 index 0000000000..1a9474277d --- /dev/null +++ b/bbot/core/helpers/dns/dns.py @@ -0,0 +1,234 @@ +import dns +import logging +import dns.exception +import dns.asyncresolver + +from bbot.core.engine import EngineClient +from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name, host_in_host + +from .engine import DNSEngine + +log = logging.getLogger("bbot.core.helpers.dns") + + +class DNSHelper(EngineClient): + + SERVER_CLASS = DNSEngine + + """Helper class for DNS-related operations within BBOT. + + This class provides mechanisms for host resolution, wildcard domain detection, event tagging, and more. + It centralizes all DNS-related activities in BBOT, offering both synchronous and asynchronous methods + for DNS resolution, as well as various utilities for batch resolution and DNS query filtering. + + Attributes: + parent_helper: A reference to the instantiated `ConfigAwareHelper` (typically `scan.helpers`). + resolver (BBOTAsyncResolver): An asynchronous DNS resolver tailored for BBOT with rate-limiting capabilities. + timeout (int): The timeout value for DNS queries. Defaults to 5 seconds. + retries (int): The number of retries for failed DNS queries. Defaults to 1. + abort_threshold (int): The threshold for aborting after consecutive failed queries. Defaults to 50. + max_dns_resolve_distance (int): Maximum allowed distance for DNS resolution. Defaults to 4. + all_rdtypes (list): A list of DNS record types to be considered during operations. + wildcard_ignore (tuple): Domains to be ignored during wildcard detection. + wildcard_tests (int): Number of tests to be run for wildcard detection. Defaults to 5. + _wildcard_cache (dict): Cache for wildcard detection results. + _dns_cache (LRUCache): Cache for DNS resolution results, limited in size. + _event_cache (LRUCache): Cache for event resolution results, tags. Limited in size. + resolver_file (Path): File containing system's current resolver nameservers. + filter_bad_ptrs (bool): Whether to filter out DNS names that appear to be auto-generated PTR records. Defaults to True. + + Args: + parent_helper: The parent helper object with configuration details and utilities. + + Raises: + DNSError: If an issue arises when creating the BBOTAsyncResolver instance. + + Examples: + >>> dns_helper = DNSHelper(parent_config) + >>> resolved_host = dns_helper.resolver.resolve("example.com") + """ + + def __init__(self, parent_helper): + self.parent_helper = parent_helper + self.config = self.parent_helper.config + super().__init__(server_kwargs={"config": self.config}) + + # resolver + self.timeout = self.config.get("dns_timeout", 5) + self.resolver = dns.asyncresolver.Resolver() + self.resolver.rotate = True + self.resolver.timeout = self.timeout + self.resolver.lifetime = self.timeout + + self.max_dns_resolve_distance = self.config.get("max_dns_resolve_distance", 5) + + # wildcard handling + self.wildcard_ignore = self.config.get("dns_wildcard_ignore", None) + if not self.wildcard_ignore: + self.wildcard_ignore = [] + self.wildcard_ignore = tuple([str(d).strip().lower() for d in self.wildcard_ignore]) + + # copy the system's current resolvers to a text file for tool use + self.system_resolvers = dns.resolver.Resolver().nameservers + # TODO: DNS server speed test (start in background task) + self.resolver_file = self.parent_helper.tempfile(self.system_resolvers, pipe=False) + + async def resolve(self, query, **kwargs): + return await self.run_and_return("resolve", query=query, **kwargs) + + async def resolve_batch(self, queries, **kwargs): + async for _ in self.run_and_yield("resolve_batch", queries=queries, **kwargs): + yield _ + + async def resolve_custom_batch(self, queries): + async for _ in self.run_and_yield("resolve_custom_batch", queries=queries): + yield _ + + async def resolve_event(self, event, minimal=False): + # abort if the event doesn't have a host + if (not event.host) or (event.type in ("IP_RANGE",)): + # tags, whitelisted, blacklisted, children + return set(), False, False, dict() + + event_host = str(event.host) + event_type = str(event.type) + kwargs = {"event_host": event_host, "event_type": event_type, "minimal": minimal} + return await self.run_and_return("resolve_event", **kwargs) + + async def is_wildcard(self, query, ips=None, rdtype=None): + """ + Use this method to check whether a *host* is a wildcard entry + + This can reliably tell the difference between a valid DNS record and a wildcard within a wildcard domain. + + If you want to know whether a domain is using wildcard DNS, use `is_wildcard_domain()` instead. + + Args: + query (str): The hostname to check for a wildcard entry. + ips (list, optional): List of IPs to compare against, typically obtained from a previous DNS resolution of the query. + rdtype (str, optional): The DNS record type (e.g., "A", "AAAA") to consider during the check. + + Returns: + dict: A dictionary indicating if the query is a wildcard for each checked DNS record type. + Keys are DNS record types like "A", "AAAA", etc. + Values are tuples where the first element is a boolean indicating if the query is a wildcard, + and the second element is the wildcard parent if it's a wildcard. + + Raises: + ValueError: If only one of `ips` or `rdtype` is specified or if no valid IPs are specified. + + Examples: + >>> is_wildcard("www.github.io") + {"A": (True, "github.io"), "AAAA": (True, "github.io")} + + >>> is_wildcard("www.evilcorp.com", ips=["93.184.216.34"], rdtype="A") + {"A": (False, "evilcorp.com")} + + Note: + `is_wildcard` can be True, False, or None (indicating that wildcard detection was inconclusive) + """ + if [ips, rdtype].count(None) == 1: + raise ValueError("Both ips and rdtype must be specified") + + # skip if query isn't a dns name + if not is_dns_name(query): + return {} + + # skip check if the query's parent domain is excluded in the config + for d in self.wildcard_ignore: + if host_in_host(query, d): + log.debug(f"Skipping wildcard detection on {query} because it is excluded in the config") + return {} + + query = clean_dns_record(query) + # skip check if it's an IP or a plain hostname + if is_ip(query) or not "." in query: + return {} + # skip check if the query is a domain + if is_domain(query): + return {} + + return await self.run_and_return("is_wildcard", query=query, ips=ips, rdtype=rdtype) + + async def is_wildcard_domain(self, domain, log_info=False): + return await self.run_and_return("is_wildcard_domain", domain=domain, log_info=False) + + async def handle_wildcard_event(self, event, children): + """ + Used within BBOT's scan manager to detect and tag DNS wildcard events. + + Wildcards are detected for every major record type. If a wildcard is detected, its data + is overwritten, for example: `_wildcard.evilcorp.com`. + + Args: + event (object): The event to check for wildcards. + children (list): A list of the event's resulting DNS children after resolution. + + Returns: + None: This method modifies the `event` in place and does not return a value. + + Examples: + >>> handle_wildcard_event(event, children) + # The `event` might now have tags like ["wildcard", "a-wildcard", "aaaa-wildcard"] and + # its `data` attribute might be modified to "_wildcard.evilcorp.com" if it was detected + # as a wildcard. + """ + log.debug(f"Entering handle_wildcard_event({event}, children={children})") + try: + event_host = str(event.host) + # wildcard checks + if not is_ip(event.host): + # check if the dns name itself is a wildcard entry + wildcard_rdtypes = await self.is_wildcard(event_host) + for rdtype, (is_wildcard, wildcard_host) in wildcard_rdtypes.items(): + wildcard_tag = "error" + if is_wildcard == True: + event.add_tag("wildcard") + wildcard_tag = "wildcard" + event.add_tag(f"{rdtype.lower()}-{wildcard_tag}") + + # wildcard event modification (www.evilcorp.com --> _wildcard.evilcorp.com) + if not is_ip(event.host) and children: + if wildcard_rdtypes: + # these are the rdtypes that successfully resolve + resolved_rdtypes = set([c.upper() for c in children]) + # these are the rdtypes that have wildcards + wildcard_rdtypes_set = set(wildcard_rdtypes) + # consider the event a full wildcard if all its records are wildcards + event_is_wildcard = False + if resolved_rdtypes: + event_is_wildcard = all(r in wildcard_rdtypes_set for r in resolved_rdtypes) + + if event_is_wildcard: + if event.type in ("DNS_NAME",) and not "_wildcard" in event.data.split("."): + wildcard_parent = self.parent_helper.parent_domain(event_host) + for rdtype, (_is_wildcard, _parent_domain) in wildcard_rdtypes.items(): + if _is_wildcard: + wildcard_parent = _parent_domain + break + wildcard_data = f"_wildcard.{wildcard_parent}" + if wildcard_data != event.data: + log.debug( + f'Wildcard detected, changing event.data "{event.data}" --> "{wildcard_data}"' + ) + event.data = wildcard_data + + # TODO: transplant this + # tag wildcard domains for convenience + # elif is_domain(event_host) or hash(event_host) in self._wildcard_cache: + # event_target = "target" in event.tags + # wildcard_domain_results = await self.is_wildcard_domain(event_host, log_info=event_target) + # for hostname, wildcard_domain_rdtypes in wildcard_domain_results.items(): + # if wildcard_domain_rdtypes: + # event.add_tag("wildcard-domain") + # for rdtype, ips in wildcard_domain_rdtypes.items(): + # event.add_tag(f"{rdtype.lower()}-wildcard-domain") + + finally: + log.debug(f"Finished handle_wildcard_event({event}, children={children})") + + async def _mock_dns(self, mock_data): + from .mock import MockResolver + + self.resolver = MockResolver(mock_data) + await self.run_and_return("_mock_dns", mock_data=mock_data) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns/engine.py similarity index 78% rename from bbot/core/helpers/dns.py rename to bbot/core/helpers/dns/engine.py index 1ae8f0dedd..54a60b3280 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns/engine.py @@ -1,103 +1,68 @@ +import os import dns import time import asyncio import logging import ipaddress import traceback -import contextlib -import dns.exception -import dns.asyncresolver -from cachetools import LRUCache from contextlib import suppress +from cachetools import LRUCache -from .regexes import dns_name_regex -from bbot.core.helpers.ratelimiter import RateLimiter +from ..regexes import dns_name_regex +from bbot.core.engine import EngineServer +from bbot.core.errors import DNSWildcardBreak from bbot.core.helpers.async_helpers import NamedLock -from bbot.core.errors import ValidationError, DNSError, DNSWildcardBreak -from .misc import is_ip, is_domain, is_dns_name, domain_parents, parent_domain, rand_string, cloudcheck - -log = logging.getLogger("bbot.core.helpers.dns") - - -class BBOTAsyncResolver(dns.asyncresolver.Resolver): - """Custom asynchronous resolver for BBOT with rate limiting. - - This class extends dnspython's async resolver and provides additional support for rate-limiting DNS queries. - The maximum number of queries allowed per second can be customized via BBOT's config. - - Attributes: - _parent_helper: A reference to the instantiated `ConfigAwareHelper` (typically `scan.helpers`). - _dns_rate_limiter (RateLimiter): An instance of the RateLimiter class for DNS query rate-limiting. - - Args: - *args: Positional arguments passed to the base resolver. - **kwargs: Keyword arguments. '_parent_helper' is expected among these to provide configuration data for - rate-limiting. All other keyword arguments are passed to the base resolver. - """ - - def __init__(self, *args, **kwargs): - self._parent_helper = kwargs.pop("_parent_helper") - dns_queries_per_second = self._parent_helper.config.get("dns_queries_per_second", 100) - self._dns_rate_limiter = RateLimiter(dns_queries_per_second, "DNS") - super().__init__(*args, **kwargs) - self.rotate = True - - async def resolve(self, *args, **kwargs): - async with self._dns_rate_limiter: - return await super().resolve(*args, **kwargs) - - -class DNSHelper: - """Helper class for DNS-related operations within BBOT. - - This class provides mechanisms for host resolution, wildcard domain detection, event tagging, and more. - It centralizes all DNS-related activities in BBOT, offering both synchronous and asynchronous methods - for DNS resolution, as well as various utilities for batch resolution and DNS query filtering. - - Attributes: - parent_helper: A reference to the instantiated `ConfigAwareHelper` (typically `scan.helpers`). - resolver (BBOTAsyncResolver): An asynchronous DNS resolver tailored for BBOT with rate-limiting capabilities. - timeout (int): The timeout value for DNS queries. Defaults to 5 seconds. - retries (int): The number of retries for failed DNS queries. Defaults to 1. - abort_threshold (int): The threshold for aborting after consecutive failed queries. Defaults to 50. - max_dns_resolve_distance (int): Maximum allowed distance for DNS resolution. Defaults to 4. - all_rdtypes (list): A list of DNS record types to be considered during operations. - wildcard_ignore (tuple): Domains to be ignored during wildcard detection. - wildcard_tests (int): Number of tests to be run for wildcard detection. Defaults to 5. - _wildcard_cache (dict): Cache for wildcard detection results. - _dns_cache (LRUCache): Cache for DNS resolution results, limited in size. - _event_cache (LRUCache): Cache for event resolution results, tags. Limited in size. - resolver_file (Path): File containing system's current resolver nameservers. - filter_bad_ptrs (bool): Whether to filter out DNS names that appear to be auto-generated PTR records. Defaults to True. - - Args: - parent_helper: The parent helper object with configuration details and utilities. - - Raises: - DNSError: If an issue arises when creating the BBOTAsyncResolver instance. - - Examples: - >>> dns_helper = DNSHelper(parent_config) - >>> resolved_host = dns_helper.resolver.resolve("example.com") - """ +from bbot.core.helpers.misc import ( + clean_dns_record, + parent_domain, + domain_parents, + is_ip, + is_domain, + is_ptr, + is_dns_name, + host_in_host, + make_ip_type, + smart_decode, + cloudcheck, + rand_string, +) + + +log = logging.getLogger("bbot.core.helpers.dns.engine.server") + + +class DNSEngine(EngineServer): + + CMDS = { + 0: "resolve", + 1: "resolve_event", + 2: "resolve_batch", + 3: "resolve_custom_batch", + 4: "is_wildcard", + 5: "is_wildcard_domain", + 99: "_mock_dns", + } all_rdtypes = ["A", "AAAA", "SRV", "MX", "NS", "SOA", "CNAME", "TXT"] - def __init__(self, parent_helper): - self.parent_helper = parent_helper - try: - self.resolver = BBOTAsyncResolver(_parent_helper=self.parent_helper) - except Exception as e: - raise DNSError(f"Failed to create BBOT DNS resolver: {e}") - self.timeout = self.parent_helper.config.get("dns_timeout", 5) - self.retries = self.parent_helper.config.get("dns_retries", 1) - self.abort_threshold = self.parent_helper.config.get("dns_abort_threshold", 50) - self.max_dns_resolve_distance = self.parent_helper.config.get("max_dns_resolve_distance", 5) + def __init__(self, socket_path, config={}): + super().__init__(socket_path) + + self.config = config + # config values + self.timeout = self.config.get("dns_timeout", 5) + self.retries = self.config.get("dns_retries", 1) + self.abort_threshold = self.config.get("dns_abort_threshold", 50) + self.max_dns_resolve_distance = self.config.get("max_dns_resolve_distance", 5) + + # resolver + self.resolver = dns.asyncresolver.Resolver() + self.resolver.rotate = True self.resolver.timeout = self.timeout self.resolver.lifetime = self.timeout # skip certain queries - dns_omit_queries = self.parent_helper.config.get("dns_omit_queries", None) + dns_omit_queries = self.config.get("dns_omit_queries", None) if not dns_omit_queries: dns_omit_queries = [] self.dns_omit_queries = dict() @@ -112,36 +77,31 @@ def __init__(self, parent_helper): except KeyError: self.dns_omit_queries[rdtype] = {query} - self.wildcard_ignore = self.parent_helper.config.get("dns_wildcard_ignore", None) + # wildcard handling + self.wildcard_ignore = self.config.get("dns_wildcard_ignore", None) if not self.wildcard_ignore: self.wildcard_ignore = [] self.wildcard_ignore = tuple([str(d).strip().lower() for d in self.wildcard_ignore]) - self.wildcard_tests = self.parent_helper.config.get("dns_wildcard_tests", 5) + self.wildcard_tests = self.config.get("dns_wildcard_tests", 5) self._wildcard_cache = dict() # since wildcard detection takes some time, This is to prevent multiple # modules from kicking off wildcard detection for the same domain at the same time self._wildcard_lock = NamedLock() + self._dns_connectivity_lock = asyncio.Lock() self._last_dns_success = None self._last_connectivity_warning = time.time() # keeps track of warnings issued for wildcard detection to prevent duplicate warnings self._dns_warnings = set() self._errors = dict() - self.fallback_nameservers_file = self.parent_helper.wordlist_dir / "nameservers.txt" - self._debug = self.parent_helper.config.get("dns_debug", False) - self._dummy_modules = dict() + self._debug = self.config.get("dns_debug", False) self._dns_cache = LRUCache(maxsize=10000) self._event_cache = LRUCache(maxsize=10000) self._event_cache_locks = NamedLock() - # copy the system's current resolvers to a text file for tool use - self.system_resolvers = dns.resolver.Resolver().nameservers - # TODO: DNS server speed test (start in background task) - self.resolver_file = self.parent_helper.tempfile(self.system_resolvers, pipe=False) - - self.filter_bad_ptrs = self.parent_helper.config.get("dns_filter_ptrs", True) + self.filter_bad_ptrs = self.config.get("dns_filter_ptrs", True) - async def resolve(self, query, **kwargs): + async def resolve(self, query, include_errors=False, **kwargs): """Resolve DNS names and IP addresses to their corresponding results. This is a high-level function that can translate a given domain name to its associated IP addresses @@ -163,6 +123,7 @@ async def resolve(self, query, **kwargs): {"1.2.3.4", "dead::beef"} """ results = set() + errors = [] try: r = await self.resolve_raw(query, **kwargs) if r: @@ -177,7 +138,10 @@ async def resolve(self, query, **kwargs): raise self.debug(f"Results for {query} with kwargs={kwargs}: {results}") - return results + if include_errors: + return results, errors + else: + return results async def resolve_raw(self, query, **kwargs): """Resolves the given query to its associated DNS records. @@ -272,7 +236,7 @@ async def _resolve_hostname(self, query, **kwargs): self.debug(f"Skipping {rdtype}:{query} because it's omitted in the config") return results, errors - parent = self.parent_helper.parent_domain(query) + parent = parent_domain(query) retries = kwargs.pop("retries", self.retries) use_cache = kwargs.pop("use_cache", True) tries_left = int(retries) + 1 @@ -419,8 +383,9 @@ async def handle_wildcard_event(self, event, children): log.debug(f"Entering handle_wildcard_event({event}, children={children})") try: event_host = str(event.host) + event_is_ip = is_ip(event_host) # wildcard checks - if not is_ip(event.host): + if not event_is_ip: # check if the dns name itself is a wildcard entry wildcard_rdtypes = await self.is_wildcard(event_host) for rdtype, (is_wildcard, wildcard_host) in wildcard_rdtypes.items(): @@ -431,7 +396,7 @@ async def handle_wildcard_event(self, event, children): event.add_tag(f"{rdtype.lower()}-{wildcard_tag}") # wildcard event modification (www.evilcorp.com --> _wildcard.evilcorp.com) - if not is_ip(event.host) and children: + if not event_is_ip and children: if wildcard_rdtypes: # these are the rdtypes that successfully resolve resolved_rdtypes = set([c.upper() for c in children]) @@ -444,7 +409,7 @@ async def handle_wildcard_event(self, event, children): if event_is_wildcard: if event.type in ("DNS_NAME",) and not "_wildcard" in event.data.split("."): - wildcard_parent = self.parent_helper.parent_domain(event_host) + wildcard_parent = parent_domain(event_host) for rdtype, (_is_wildcard, _parent_domain) in wildcard_rdtypes.items(): if _is_wildcard: wildcard_parent = _parent_domain @@ -467,7 +432,7 @@ async def handle_wildcard_event(self, event, children): finally: log.debug(f"Finished handle_wildcard_event({event}, children={children})") - async def resolve_event(self, event, minimal=False): + async def resolve_event(self, event_host, event_type, minimal=False): """ Tag the given event with the appropriate DNS record types and optionally create child events based on DNS resolutions. @@ -493,17 +458,13 @@ async def resolve_event(self, event, minimal=False): This method does not modify the passed in `event`. Instead, it returns data that can be used to modify or act upon the `event`. """ - log.debug(f"Resolving {event}") - event_host = str(event.host) + log.debug(f"Resolving event {event_type}:{event_host}") event_tags = set() dns_children = dict() event_whitelisted = False event_blacklisted = False try: - if (not event.host) or (event.type in ("IP_RANGE",)): - return event_tags, event_whitelisted, event_blacklisted, dns_children - # lock to ensure resolution of the same host doesn't start while we're working here async with self._event_cache_locks.lock(event_host): # try to get data from cache @@ -515,11 +476,11 @@ async def resolve_event(self, event, minimal=False): # then resolve types = () - if self.parent_helper.is_ip(event.host): + if is_ip(event_host): if not minimal: types = ("PTR",) else: - if event.type == "DNS_NAME" and not minimal: + if event_type == "DNS_NAME" and not minimal: types = self.all_rdtypes else: types = ("A", "AAAA") @@ -540,18 +501,19 @@ async def resolve_event(self, event, minimal=False): for r in records: for _, t in self.extract_targets(r): if t: - ip = self.parent_helper.make_ip_type(t) - - if rdtype in ("A", "AAAA", "CNAME"): - with contextlib.suppress(ValidationError): - if self.parent_helper.is_ip(ip): - if self.parent_helper.preset.whitelisted(ip): - event_whitelisted = True - with contextlib.suppress(ValidationError): - if self.parent_helper.preset.blacklisted(ip): - event_blacklisted = True - - if self.filter_bad_ptrs and rdtype in ("PTR") and self.parent_helper.is_ptr(t): + ip = make_ip_type(t) + + # TODO: transplant this + # if rdtype in ("A", "AAAA", "CNAME"): + # with contextlib.suppress(ValidationError): + # if self.parent_helper.is_ip(ip): + # if self.parent_helper.preset.whitelisted(ip): + # event_whitelisted = True + # with contextlib.suppress(ValidationError): + # if self.parent_helper.preset.blacklisted(ip): + # event_blacklisted = True + + if self.filter_bad_ptrs and rdtype in ("PTR") and is_ptr(t): self.debug(f"Filtering out bad PTR: {t}") continue @@ -561,10 +523,10 @@ async def resolve_event(self, event, minimal=False): dns_children[rdtype] = {ip} # tag with cloud providers - if not self.parent_helper.in_tests: + if not self.in_tests: to_check = set() - if event.type == "IP_ADDRESS": - to_check.add(event.data) + if event_type == "IP_ADDRESS": + to_check.add(event_host) for rdtype, ips in dns_children.items(): if rdtype in ("A", "AAAA"): for ip in ips: @@ -592,7 +554,7 @@ async def resolve_event(self, event, minimal=False): return event_tags, event_whitelisted, event_blacklisted, dns_children finally: - log.debug(f"Finished resolving {event}") + log.debug(f"Finished resolving event {event_type}:{event_host}") def event_cache_get(self, host): """ @@ -648,6 +610,11 @@ async def resolve_batch(self, queries, **kwargs): for q in queries: yield (q, await self.resolve(q, **kwargs)) + async def resolve_custom_batch(self, queries): + for query, rdtype in queries: + answers, errors = await self.resolve(query, type=rdtype, include_errors=True) + yield ((query, rdtype), (answers, errors)) + def extract_targets(self, record): """ Extracts hostnames or IP addresses from a given DNS record. @@ -676,53 +643,26 @@ def extract_targets(self, record): results = set() rdtype = str(record.rdtype.name).upper() if rdtype in ("A", "AAAA", "NS", "CNAME", "PTR"): - results.add((rdtype, self._clean_dns_record(record))) + results.add((rdtype, clean_dns_record(record))) elif rdtype == "SOA": - results.add((rdtype, self._clean_dns_record(record.mname))) + results.add((rdtype, clean_dns_record(record.mname))) elif rdtype == "MX": - results.add((rdtype, self._clean_dns_record(record.exchange))) + results.add((rdtype, clean_dns_record(record.exchange))) elif rdtype == "SRV": - results.add((rdtype, self._clean_dns_record(record.target))) + results.add((rdtype, clean_dns_record(record.target))) elif rdtype == "TXT": for s in record.strings: - s = self.parent_helper.smart_decode(s) + s = smart_decode(s) for match in dns_name_regex.finditer(s): start, end = match.span() host = s[start:end] results.add((rdtype, host)) elif rdtype == "NSEC": - results.add((rdtype, self._clean_dns_record(record.next))) + results.add((rdtype, clean_dns_record(record.next))) else: log.warning(f'Unknown DNS record type "{rdtype}"') return results - @staticmethod - def _clean_dns_record(record): - """ - Cleans and formats a given DNS record for further processing. - - This static method converts the DNS record to text format if it's not already a string. - It also removes any trailing dots and converts the record to lowercase. - - Args: - record (str or dns.rdata.Rdata): The DNS record to clean. - - Returns: - str: The cleaned and formatted DNS record. - - Examples: - >>> _clean_dns_record('www.evilcorp.com.') - 'www.evilcorp.com' - - >>> from dns.rrset import from_text - >>> record = from_text('www.evilcorp.com', 3600, 'IN', 'A', '1.2.3.4')[0] - >>> _clean_dns_record(record) - '1.2.3.4' - """ - if not isinstance(record, str): - record = str(record.to_text()) - return str(record).rstrip(".").lower() - async def _catch(self, callback, *args, **kwargs): """ Asynchronously catches exceptions thrown during DNS resolution and logs them. @@ -790,55 +730,31 @@ async def is_wildcard(self, query, ips=None, rdtype=None): """ result = {} - if [ips, rdtype].count(None) == 1: - raise ValueError("Both ips and rdtype must be specified") - - if not is_dns_name(query): - return {} - - # skip check if the query's parent domain is excluded in the config - for d in self.wildcard_ignore: - if self.parent_helper.host_in_host(query, d): - log.debug(f"Skipping wildcard detection on {query} because it is excluded in the config") - return {} - - query = self._clean_dns_record(query) - # skip check if it's an IP - if is_ip(query) or not "." in query: - return {} - # skip check if the query is a domain - if is_domain(query): - return {} - parent = parent_domain(query) parents = list(domain_parents(query)) rdtypes_to_check = [rdtype] if rdtype is not None else self.all_rdtypes - base_query_ips = dict() + query_baseline = dict() # if the caller hasn't already done the work of resolving the IPs if ips is None: # then resolve the query for all rdtypes - for t in rdtypes_to_check: - raw_results, errors = await self.resolve_raw(query, type=t, use_cache=True) - if errors and not raw_results: - self.debug(f"Failed to resolve {query} ({t}) during wildcard detection") - result[t] = (None, parent) - continue - for __rdtype, answers in raw_results: - base_query_results = set() - for answer in answers: - for _, t in self.extract_targets(answer): - base_query_results.add(t) - if base_query_results: - base_query_ips[__rdtype] = base_query_results + queries = [(query, t) for t in rdtypes_to_check] + async for (query, _rdtype), (answers, errors) in self.resolve_custom_batch(queries): + if answers: + query_baseline[_rdtype] = answers + else: + if errors: + self.debug(f"Failed to resolve {query} ({_rdtype}) during wildcard detection") + result[_rdtype] = (None, parent) + continue else: # otherwise, we can skip all that - cleaned_ips = set([self._clean_dns_record(ip) for ip in ips]) + cleaned_ips = set([clean_dns_record(ip) for ip in ips]) if not cleaned_ips: raise ValueError("Valid IPs must be specified") - base_query_ips[rdtype] = cleaned_ips - if not base_query_ips: + query_baseline[rdtype] = cleaned_ips + if not query_baseline: return result # once we've resolved the base query and have IP addresses to work with @@ -851,9 +767,9 @@ async def is_wildcard(self, query, ips=None, rdtype=None): await self.is_wildcard_domain(host) # for every rdtype - for _rdtype in list(base_query_ips): + for _rdtype in list(query_baseline): # get the IPs from above - query_ips = base_query_ips.get(_rdtype, set()) + query_ips = query_baseline.get(_rdtype, set()) host_hash = hash(host) if host_hash in self._wildcard_cache: @@ -870,13 +786,14 @@ async def is_wildcard(self, query, ips=None, rdtype=None): result[_rdtype] = (True, host) # if we've reached a point where the dns name is a complete wildcard, class can be dismissed early - base_query_rdtypes = set(base_query_ips) + base_query_rdtypes = set(query_baseline) wildcard_rdtypes_set = set([k for k, v in result.items() if v[0] is True]) if base_query_rdtypes and wildcard_rdtypes_set and base_query_rdtypes == wildcard_rdtypes_set: log.debug( f"Breaking from wildcard detection for {query} at {host} because base query rdtypes ({base_query_rdtypes}) == wildcard rdtypes ({wildcard_rdtypes_set})" ) raise DNSWildcardBreak() + except DNSWildcardBreak: pass @@ -904,14 +821,14 @@ async def is_wildcard_domain(self, domain, log_info=False): {} """ wildcard_domain_results = {} - domain = self._clean_dns_record(domain) + domain = clean_dns_record(domain) if not is_dns_name(domain): return {} # skip check if the query's parent domain is excluded in the config for d in self.wildcard_ignore: - if self.parent_helper.host_in_host(domain, d): + if host_in_host(domain, d): log.debug(f"Skipping wildcard detection on {domain} because it is excluded in the config") return {} @@ -1010,3 +927,12 @@ def _parse_rdtype(self, t, default=None): def debug(self, *args, **kwargs): if self._debug: log.trace(*args, **kwargs) + + @property + def in_tests(self): + return os.getenv("BBOT_TESTING", "") == "True" + + async def _mock_dns(self, mock_data): + from .mock import MockResolver + + self.resolver = MockResolver(mock_data) diff --git a/bbot/core/helpers/dns/mock.py b/bbot/core/helpers/dns/mock.py new file mode 100644 index 0000000000..0685a8e80d --- /dev/null +++ b/bbot/core/helpers/dns/mock.py @@ -0,0 +1,55 @@ +import dns + +class MockResolver: + + def __init__(self, mock_data=None): + self.mock_data = mock_data if mock_data else {} + self.nameservers = ["127.0.0.1"] + + async def resolve_address(self, ipaddr, *args, **kwargs): + modified_kwargs = {} + modified_kwargs.update(kwargs) + modified_kwargs["rdtype"] = "PTR" + return await self.resolve(str(dns.reversename.from_address(ipaddr)), *args, **modified_kwargs) + + def create_dns_response(self, query_name, rdtype): + query_name = query_name.strip(".") + answers = self.mock_data.get(query_name, {}).get(rdtype, []) + if not answers: + raise self.dns.resolver.NXDOMAIN(f"No answer found for {query_name} {rdtype}") + + message_text = f"""id 1234 +opcode QUERY +rcode NOERROR +flags QR AA RD +;QUESTION +{query_name}. IN {rdtype} +;ANSWER""" + for answer in answers: + message_text += f"\n{query_name}. 1 IN {rdtype} {answer}" + + message_text += "\n;AUTHORITY\n;ADDITIONAL\n" + message = self.dns.message.from_text(message_text) + return message + + async def resolve(self, query_name, rdtype=None): + if rdtype is None: + rdtype = "A" + elif isinstance(rdtype, str): + rdtype = rdtype.upper() + else: + rdtype = str(rdtype.name).upper() + + domain_name = self.dns.name.from_text(query_name) + rdtype_obj = self.dns.rdatatype.from_text(rdtype) + + if "_NXDOMAIN" in self.mock_data and query_name in self.mock_data["_NXDOMAIN"]: + # Simulate the NXDOMAIN exception + raise self.dns.resolver.NXDOMAIN + + try: + response = self.create_dns_response(query_name, rdtype) + answer = self.dns.resolver.Answer(domain_name, rdtype_obj, self.dns.rdataclass.IN, response) + return answer + except self.dns.resolver.NXDOMAIN: + return [] diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index c229b26a1f..94fd261174 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -2622,3 +2622,30 @@ async def as_completed(coros): for task in done: tasks.pop(task) yield task + + +def clean_dns_record(record): + """ + Cleans and formats a given DNS record for further processing. + + This static method converts the DNS record to text format if it's not already a string. + It also removes any trailing dots and converts the record to lowercase. + + Args: + record (str or dns.rdata.Rdata): The DNS record to clean. + + Returns: + str: The cleaned and formatted DNS record. + + Examples: + >>> clean_dns_record('www.evilcorp.com.') + 'www.evilcorp.com' + + >>> from dns.rrset import from_text + >>> record = from_text('www.evilcorp.com', 3600, 'IN', 'A', '1.2.3.4')[0] + >>> clean_dns_record(record) + '1.2.3.4' + """ + if not isinstance(record, str): + record = str(record.to_text()) + return str(record).rstrip(".").lower() diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index c859d6c421..a849ae6e7e 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -212,10 +212,10 @@ def __init__( self.dispatcher_tasks = [] # multiprocessing thread pool - try: - mp.set_start_method("spawn") - except Exception: - self.warning(f"Failed to set multiprocessing spawn method. This may negatively affect performance.") + start_method = mp.get_start_method() + if start_method != "spawn": + self.warning(f"Multiprocessing spawn method is set to {start_method}.") + # we spawn 1 fewer processes than cores # this helps to avoid locking up the system or competing with the main python process for cpu time num_processes = max(1, mp.cpu_count() - 1) diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index d34cdf7301..29c27fa933 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -1,5 +1,4 @@ import os # noqa -import dns import sys import pytest import asyncio # noqa @@ -32,6 +31,11 @@ available_internal_modules = list(DEFAULT_PRESET.module_loader.configs(type="internal")) +@pytest.fixture(scope="session", autouse=True) +def setup_logging(): + CORE.logger.setup_queue_handler() + + @pytest.fixture def clean_default_config(monkeypatch): clean_config = OmegaConf.merge( @@ -216,67 +220,3 @@ def install_all_python_deps(): for module in DEFAULT_PRESET.module_loader.preloaded().values(): deps_pip.update(set(module.get("deps", {}).get("pip", []))) subprocess.run([sys.executable, "-m", "pip", "install"] + list(deps_pip)) - - -class MockResolver: - import dns - - def __init__(self, mock_data=None): - self.mock_data = mock_data if mock_data else {} - self.nameservers = ["127.0.0.1"] - - async def resolve_address(self, ipaddr, *args, **kwargs): - modified_kwargs = {} - modified_kwargs.update(kwargs) - modified_kwargs["rdtype"] = "PTR" - return await self.resolve(str(dns.reversename.from_address(ipaddr)), *args, **modified_kwargs) - - def create_dns_response(self, query_name, rdtype): - query_name = query_name.strip(".") - answers = self.mock_data.get(query_name, {}).get(rdtype, []) - if not answers: - raise self.dns.resolver.NXDOMAIN(f"No answer found for {query_name} {rdtype}") - - message_text = f"""id 1234 -opcode QUERY -rcode NOERROR -flags QR AA RD -;QUESTION -{query_name}. IN {rdtype} -;ANSWER""" - for answer in answers: - message_text += f"\n{query_name}. 1 IN {rdtype} {answer}" - - message_text += "\n;AUTHORITY\n;ADDITIONAL\n" - message = self.dns.message.from_text(message_text) - return message - - async def resolve(self, query_name, rdtype=None): - if rdtype is None: - rdtype = "A" - elif isinstance(rdtype, str): - rdtype = rdtype.upper() - else: - rdtype = str(rdtype.name).upper() - - domain_name = self.dns.name.from_text(query_name) - rdtype_obj = self.dns.rdatatype.from_text(rdtype) - - if "_NXDOMAIN" in self.mock_data and query_name in self.mock_data["_NXDOMAIN"]: - # Simulate the NXDOMAIN exception - raise self.dns.resolver.NXDOMAIN - - try: - response = self.create_dns_response(query_name, rdtype) - answer = self.dns.resolver.Answer(domain_name, rdtype_obj, self.dns.rdataclass.IN, response) - return answer - except self.dns.resolver.NXDOMAIN: - return [] - - -@pytest.fixture() -def mock_dns(): - def _mock_dns(scan, mock_data): - scan.helpers.dns.resolver = MockResolver(mock_data) - - return _mock_dns diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index 6fd51800fb..594192568d 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -2,72 +2,110 @@ @pytest.mark.asyncio -async def test_dns(bbot_scanner, mock_dns): +async def test_dns_engine(bbot_scanner): + scan = bbot_scanner() + result = await scan.helpers.resolve("one.one.one.one") + assert "1.1.1.1" in result + assert "2606:4700:4700::1111" in result + + results = [_ async for _ in scan.helpers.resolve_batch(("one.one.one.one", "1.1.1.1"))] + pass_1 = False + pass_2 = False + for query, result in results: + if query == "one.one.one.one" and "1.1.1.1" in result: + pass_1 = True + elif query == "1.1.1.1" and "one.one.one.one" in result: + pass_2 = True + assert pass_1 and pass_2 + + results = [_ async for _ in scan.helpers.resolve_custom_batch((("one.one.one.one", "A"), ("1.1.1.1", "PTR")))] + pass_1 = False + pass_2 = False + for (query, rdtype), (result, errors) in results: + if query == "one.one.one.one" and "1.1.1.1" in result: + pass_1 = True + elif query == "1.1.1.1" and "one.one.one.one" in result: + pass_2 = True + assert pass_1 and pass_2 + + +@pytest.mark.asyncio +async def test_dns(bbot_scanner): scan = bbot_scanner("1.1.1.1") - helpers = scan.helpers + + from bbot.core.helpers.dns.engine import DNSEngine + + dnsengine = DNSEngine(None) # lowest level functions - a_responses = await helpers._resolve_hostname("one.one.one.one") - aaaa_responses = await helpers._resolve_hostname("one.one.one.one", rdtype="AAAA") - ip_responses = await helpers._resolve_ip("1.1.1.1") + a_responses = await dnsengine._resolve_hostname("one.one.one.one") + aaaa_responses = await dnsengine._resolve_hostname("one.one.one.one", rdtype="AAAA") + ip_responses = await dnsengine._resolve_ip("1.1.1.1") assert a_responses[0].response.answer[0][0].address in ("1.1.1.1", "1.0.0.1") assert aaaa_responses[0].response.answer[0][0].address in ("2606:4700:4700::1111", "2606:4700:4700::1001") assert ip_responses[0].response.answer[0][0].target.to_text() in ("one.one.one.one.",) # mid level functions - _responses, errors = await helpers.resolve_raw("one.one.one.one") + _responses, errors = await dnsengine.resolve_raw("one.one.one.one") responses = [] for rdtype, response in _responses: for answers in response: - responses += list(helpers.extract_targets(answers)) + responses += list(dnsengine.extract_targets(answers)) assert ("A", "1.1.1.1") in responses - _responses, errors = await helpers.resolve_raw("one.one.one.one", rdtype="AAAA") + _responses, errors = await dnsengine.resolve_raw("one.one.one.one", rdtype="AAAA") responses = [] for rdtype, response in _responses: for answers in response: - responses += list(helpers.extract_targets(answers)) + responses += list(dnsengine.extract_targets(answers)) assert ("AAAA", "2606:4700:4700::1111") in responses - _responses, errors = await helpers.resolve_raw("1.1.1.1") + _responses, errors = await dnsengine.resolve_raw("1.1.1.1") responses = [] for rdtype, response in _responses: for answers in response: - responses += list(helpers.extract_targets(answers)) + responses += list(dnsengine.extract_targets(answers)) assert ("PTR", "one.one.one.one") in responses # high level functions - assert "1.1.1.1" in await helpers.resolve("one.one.one.one") - assert "2606:4700:4700::1111" in await helpers.resolve("one.one.one.one", type="AAAA") - assert "one.one.one.one" in await helpers.resolve("1.1.1.1") + assert "1.1.1.1" in await dnsengine.resolve("one.one.one.one") + assert "2606:4700:4700::1111" in await dnsengine.resolve("one.one.one.one", type="AAAA") + assert "one.one.one.one" in await dnsengine.resolve("1.1.1.1") for rdtype in ("NS", "SOA", "MX", "TXT"): - assert len(await helpers.resolve("google.com", type=rdtype)) > 0 + assert len(await dnsengine.resolve("google.com", type=rdtype)) > 0 # batch resolution - batch_results = [r async for r in helpers.resolve_batch(["1.1.1.1", "one.one.one.one"])] + batch_results = [r async for r in dnsengine.resolve_batch(["1.1.1.1", "one.one.one.one"])] assert len(batch_results) == 2 batch_results = dict(batch_results) assert any([x in batch_results["one.one.one.one"] for x in ("1.1.1.1", "1.0.0.1")]) assert "one.one.one.one" in batch_results["1.1.1.1"] + # custom batch resolution + batch_results = [r async for r in dnsengine.resolve_custom_batch([("1.1.1.1", "PTR"), ("one.one.one.one", "A")])] + assert len(batch_results) == 2 + batch_results = dict(batch_results) + assert any([x in batch_results[("one.one.one.one", "A")][0] for x in ("1.1.1.1", "1.0.0.1")]) + assert "one.one.one.one" in batch_results[("1.1.1.1", "PTR")][0] + # "any" type - resolved = await helpers.resolve("google.com", type="any") - assert any([helpers.is_subdomain(h) for h in resolved]) + resolved = await dnsengine.resolve("google.com", type="any") + assert any([scan.helpers.is_subdomain(h) for h in resolved]) # dns cache - helpers.dns._dns_cache.clear() - assert hash(f"1.1.1.1:PTR") not in helpers.dns._dns_cache - assert hash(f"one.one.one.one:A") not in helpers.dns._dns_cache - assert hash(f"one.one.one.one:AAAA") not in helpers.dns._dns_cache - await helpers.resolve("1.1.1.1", use_cache=False) - await helpers.resolve("one.one.one.one", use_cache=False) - assert hash(f"1.1.1.1:PTR") not in helpers.dns._dns_cache - assert hash(f"one.one.one.one:A") not in helpers.dns._dns_cache - assert hash(f"one.one.one.one:AAAA") not in helpers.dns._dns_cache - - await helpers.resolve("1.1.1.1") - assert hash(f"1.1.1.1:PTR") in helpers.dns._dns_cache - await helpers.resolve("one.one.one.one") - assert hash(f"one.one.one.one:A") in helpers.dns._dns_cache - assert hash(f"one.one.one.one:AAAA") in helpers.dns._dns_cache + dnsengine._dns_cache.clear() + assert hash(f"1.1.1.1:PTR") not in dnsengine._dns_cache + assert hash(f"one.one.one.one:A") not in dnsengine._dns_cache + assert hash(f"one.one.one.one:AAAA") not in dnsengine._dns_cache + await dnsengine.resolve("1.1.1.1", use_cache=False) + await dnsengine.resolve("one.one.one.one", use_cache=False) + assert hash(f"1.1.1.1:PTR") not in dnsengine._dns_cache + assert hash(f"one.one.one.one:A") not in dnsengine._dns_cache + assert hash(f"one.one.one.one:AAAA") not in dnsengine._dns_cache + + await dnsengine.resolve("1.1.1.1") + assert hash(f"1.1.1.1:PTR") in dnsengine._dns_cache + await dnsengine.resolve("one.one.one.one") + assert hash(f"one.one.one.one:A") in dnsengine._dns_cache + assert hash(f"one.one.one.one:AAAA") in dnsengine._dns_cache # Ensure events with hosts have resolved_hosts attribute populated resolved_hosts_event1 = scan.make_event("one.one.one.one", "DNS_NAME", dummy=True) @@ -83,8 +121,7 @@ async def test_dns(bbot_scanner, mock_dns): assert set(children1.keys()) == set(children2.keys()) scan2 = bbot_scanner("evilcorp.com", config={"dns_resolution": True}) - mock_dns( - scan2, + await scan2.helpers.dns._mock_dns( { "evilcorp.com": {"TXT": ['"v=spf1 include:cloudprovider.com ~all"']}, "cloudprovider.com": {"A": ["1.2.3.4"]}, @@ -101,33 +138,37 @@ async def test_wildcards(bbot_scanner): scan = bbot_scanner("1.1.1.1") helpers = scan.helpers + from bbot.core.helpers.dns.engine import DNSEngine + + dnsengine = DNSEngine(None) + # wildcards - wildcard_domains = await helpers.is_wildcard_domain("asdf.github.io") - assert hash("github.io") in helpers.dns._wildcard_cache - assert hash("asdf.github.io") in helpers.dns._wildcard_cache + wildcard_domains = await dnsengine.is_wildcard_domain("asdf.github.io") + assert hash("github.io") in dnsengine._wildcard_cache + assert hash("asdf.github.io") in dnsengine._wildcard_cache assert "github.io" in wildcard_domains assert "A" in wildcard_domains["github.io"] assert "SRV" not in wildcard_domains["github.io"] assert wildcard_domains["github.io"]["A"] and all(helpers.is_ip(r) for r in wildcard_domains["github.io"]["A"]) - helpers.dns._wildcard_cache.clear() + dnsengine._wildcard_cache.clear() - wildcard_rdtypes = await helpers.is_wildcard("blacklanternsecurity.github.io") + wildcard_rdtypes = await dnsengine.is_wildcard("blacklanternsecurity.github.io") assert "A" in wildcard_rdtypes assert "SRV" not in wildcard_rdtypes assert wildcard_rdtypes["A"] == (True, "github.io") - assert hash("github.io") in helpers.dns._wildcard_cache - assert len(helpers.dns._wildcard_cache[hash("github.io")]) > 0 - helpers.dns._wildcard_cache.clear() + assert hash("github.io") in dnsengine._wildcard_cache + assert len(dnsengine._wildcard_cache[hash("github.io")]) > 0 + dnsengine._wildcard_cache.clear() - wildcard_rdtypes = await helpers.is_wildcard("asdf.asdf.asdf.github.io") + wildcard_rdtypes = await dnsengine.is_wildcard("asdf.asdf.asdf.github.io") assert "A" in wildcard_rdtypes assert "SRV" not in wildcard_rdtypes assert wildcard_rdtypes["A"] == (True, "github.io") - assert hash("github.io") in helpers.dns._wildcard_cache - assert not hash("asdf.github.io") in helpers.dns._wildcard_cache - assert not hash("asdf.asdf.github.io") in helpers.dns._wildcard_cache - assert not hash("asdf.asdf.asdf.github.io") in helpers.dns._wildcard_cache - assert len(helpers.dns._wildcard_cache[hash("github.io")]) > 0 + assert hash("github.io") in dnsengine._wildcard_cache + assert not hash("asdf.github.io") in dnsengine._wildcard_cache + assert not hash("asdf.asdf.github.io") in dnsengine._wildcard_cache + assert not hash("asdf.asdf.asdf.github.io") in dnsengine._wildcard_cache + assert len(dnsengine._wildcard_cache[hash("github.io")]) > 0 wildcard_event1 = scan.make_event("wat.asdf.fdsa.github.io", "DNS_NAME", dummy=True) wildcard_event2 = scan.make_event("wats.asd.fdsa.github.io", "DNS_NAME", dummy=True) wildcard_event3 = scan.make_event("github.io", "DNS_NAME", dummy=True) @@ -147,6 +188,7 @@ async def test_wildcards(bbot_scanner): assert "srv-wildcard" not in wildcard_event2.tags assert wildcard_event1.data == "_wildcard.github.io" assert wildcard_event2.data == "_wildcard.github.io" - assert "wildcard-domain" in wildcard_event3.tags - assert "a-wildcard-domain" in wildcard_event3.tags - assert "srv-wildcard-domain" not in wildcard_event3.tags + # TODO: re-enable this? + # assert "wildcard-domain" in wildcard_event3.tags + # assert "a-wildcard-domain" in wildcard_event3.tags + # assert "srv-wildcard-domain" not in wildcard_event3.tags diff --git a/bbot/test/test_step_1/test_manager_deduplication.py b/bbot/test/test_step_1/test_manager_deduplication.py index 435e57991e..63305d0e4e 100644 --- a/bbot/test/test_step_1/test_manager_deduplication.py +++ b/bbot/test/test_step_1/test_manager_deduplication.py @@ -61,7 +61,7 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) scan.modules["per_hostport_only"] = per_hostport_only scan.modules["per_domain_only"] = per_domain_only if _dns_mock: - mock_dns(scan, _dns_mock) + await scan.helpers.dns._mock_dns(_dns_mock) if scan_callback is not None: scan_callback(scan) return ( diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index 3b9f57767d..f3b589d7c9 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -799,7 +799,7 @@ async def test_manager_blacklist(bbot_scanner, bbot_httpserver, caplog, mock_dns whitelist=["127.0.0.0/29", "test.notreal"], blacklist=["127.0.0.64/29"], ) - mock_dns(scan, { + await scan.helpers.dns._mock_dns({ "www-prod.test.notreal": {"A": ["127.0.0.66"]}, "www-dev.test.notreal": {"A": ["127.0.0.22"]}, }) diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 2fada79687..7b2997f650 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -326,8 +326,7 @@ async def handle_event(self, event): output_modules=["python"], force_start=True, ) - mock_dns( - scan, + await scan.helpers.dns._mock_dns( { "evilcorp.com": {"A": ["127.0.254.1"]}, "www.evilcorp.com": {"A": ["127.0.254.2"]}, diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index 3f1c01c044..ace0cad7b1 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -59,7 +59,7 @@ async def test_scan( # make sure DNS resolution works scan4 = bbot_scanner("1.1.1.1", config={"dns_resolution": True}) - mock_dns(scan4, dns_table) + await scan4.helpers.dns._mock_dns(dns_table) events = [] async for event in scan4.async_start(): events.append(event) @@ -68,7 +68,7 @@ async def test_scan( # make sure it doesn't work when you turn it off scan5 = bbot_scanner("1.1.1.1", config={"dns_resolution": False}) - mock_dns(scan5, dns_table) + await scan5.helpers.dns._mock_dns(dns_table) events = [] async for event in scan5.async_start(): events.append(event) diff --git a/bbot/test/test_step_2/module_tests/base.py b/bbot/test/test_step_2/module_tests/base.py index a4ea06f81d..8db36cd8d5 100644 --- a/bbot/test/test_step_2/module_tests/base.py +++ b/bbot/test/test_step_2/module_tests/base.py @@ -91,10 +91,10 @@ def set_expect_requests(self, expect_args={}, respond_args={}): def set_expect_requests_handler(self, expect_args=None, request_handler=None): self.httpserver.expect_request(expect_args).respond_with_handler(request_handler) - def mock_dns(self, mock_data, scan=None): + async def mock_dns(self, mock_data, scan=None): if scan is None: scan = self.scan - scan.helpers.dns.resolver = MockResolver(mock_data) + await scan.helpers.dns._dns_mock(mock_data) @property def module(self): diff --git a/bbot/test/test_step_2/module_tests/test_module_affiliates.py b/bbot/test/test_step_2/module_tests/test_module_affiliates.py index 4afd4cd29e..b138dce652 100644 --- a/bbot/test/test_step_2/module_tests/test_module_affiliates.py +++ b/bbot/test/test_step_2/module_tests/test_module_affiliates.py @@ -6,7 +6,7 @@ class TestAffiliates(ModuleTestBase): config_overrides = {"dns_resolution": True} async def setup_before_prep(self, module_test): - module_test.mock_dns( + await module_test.mock_dns( { "8.8.8.8.in-addr.arpa": {"PTR": ["dns.google"]}, "dns.google": {"A": ["8.8.8.8"], "NS": ["ns1.zdns.google"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_aggregate.py b/bbot/test/test_step_2/module_tests/test_module_aggregate.py index 7a41fe0229..a41c60701a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_aggregate.py +++ b/bbot/test/test_step_2/module_tests/test_module_aggregate.py @@ -5,7 +5,7 @@ class TestAggregate(ModuleTestBase): config_overrides = {"dns_resolution": True, "scope_report_distance": 1} async def setup_before_prep(self, module_test): - module_test.mock_dns({"blacklanternsecurity.com": {"A": ["1.2.3.4"]}}) + await module_test.mock_dns({"blacklanternsecurity.com": {"A": ["1.2.3.4"]}}) def check(self, module_test, events): filename = next(module_test.scan.home.glob("scan-stats-table*.txt")) diff --git a/bbot/test/test_step_2/module_tests/test_module_asset_inventory.py b/bbot/test/test_step_2/module_tests/test_module_asset_inventory.py index af46ad5ba1..6b6c78dbf0 100644 --- a/bbot/test/test_step_2/module_tests/test_module_asset_inventory.py +++ b/bbot/test/test_step_2/module_tests/test_module_asset_inventory.py @@ -8,7 +8,7 @@ class TestAsset_Inventory(ModuleTestBase): modules_overrides = ["asset_inventory", "nmap", "sslcert"] async def setup_before_prep(self, module_test): - module_test.mock_dns( + await module_test.mock_dns( { "1.0.0.127.in-addr.arpa": {"PTR": ["www.bbottest.notreal"]}, "www.bbottest.notreal": {"A": ["127.0.0.1"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_baddns.py b/bbot/test/test_step_2/module_tests/test_module_baddns.py index 57cca7d5fc..dd533669f2 100644 --- a/bbot/test/test_step_2/module_tests/test_module_baddns.py +++ b/bbot/test/test_step_2/module_tests/test_module_baddns.py @@ -24,7 +24,7 @@ async def setup_after_prep(self, module_test): from bbot.modules import baddns as baddns_module from baddns.lib.whoismanager import WhoisManager - module_test.mock_dns( + await module_test.mock_dns( {"bad.dns": {"CNAME": ["baddns.azurewebsites.net."]}, "_NXDOMAIN": ["baddns.azurewebsites.net"]} ) module_test.monkeypatch.setattr(baddns_module.baddns, "select_modules", self.select_modules) @@ -52,7 +52,7 @@ def set_target(self, target): respond_args = {"response_data": "

Oops! We couldn’t find that page.

", "status": 200} module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) - module_test.mock_dns( + await module_test.mock_dns( {"bad.dns": {"CNAME": ["baddns.bigcartel.com."]}, "baddns.bigcartel.com": {"A": ["127.0.0.1"]}} ) module_test.monkeypatch.setattr(baddns_module.baddns, "select_modules", self.select_modules) diff --git a/bbot/test/test_step_2/module_tests/test_module_baddns_zone.py b/bbot/test/test_step_2/module_tests/test_module_baddns_zone.py index 349db8ed8b..b3810e75a5 100644 --- a/bbot/test/test_step_2/module_tests/test_module_baddns_zone.py +++ b/bbot/test/test_step_2/module_tests/test_module_baddns_zone.py @@ -31,7 +31,7 @@ def from_xfr(*args, **kwargs): zone = dns.zone.from_text(zone_text, origin="bad.dns.") return zone - module_test.mock_dns({"bad.dns": {"NS": ["ns1.bad.dns."]}, "ns1.bad.dns": {"A": ["127.0.0.1"]}}) + await module_test.mock_dns({"bad.dns": {"NS": ["ns1.bad.dns."]}, "ns1.bad.dns": {"A": ["127.0.0.1"]}}) module_test.monkeypatch.setattr("dns.zone.from_xfr", from_xfr) module_test.monkeypatch.setattr(WhoisManager, "dispatchWHOIS", self.dispatchWHOIS) @@ -46,7 +46,7 @@ class TestBaddns_zone_nsec(BaseTestBaddns_zone): async def setup_after_prep(self, module_test): from baddns.lib.whoismanager import WhoisManager - module_test.mock_dns( + await module_test.mock_dns( { "bad.dns": {"NSEC": ["asdf.bad.dns"]}, "asdf.bad.dns": {"NSEC": ["zzzz.bad.dns"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_dehashed.py b/bbot/test/test_step_2/module_tests/test_module_dehashed.py index 34c73de825..73260f3275 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dehashed.py +++ b/bbot/test/test_step_2/module_tests/test_module_dehashed.py @@ -48,7 +48,7 @@ async def setup_before_prep(self, module_test): url=f"https://api.dehashed.com/search?query=domain:blacklanternsecurity.com&size=10000&page=1", json=dehashed_domain_response, ) - module_test.mock_dns( + await module_test.mock_dns( { "bob.com": {"A": ["127.0.0.1"]}, "blacklanternsecurity.com": {"A": ["127.0.0.1"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py b/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py index aaf26664cc..5850fbd495 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py +++ b/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py @@ -6,7 +6,7 @@ class TestDNSCommonSRV(ModuleTestBase): config_overrides = {"dns_resolution": True} async def setup_after_prep(self, module_test): - module_test.mock_dns( + await module_test.mock_dns( { "_ldap._tcp.gc._msdcs.blacklanternsecurity.notreal": { "SRV": ["0 100 3268 asdf.blacklanternsecurity.notreal"] diff --git a/bbot/test/test_step_2/module_tests/test_module_internetdb.py b/bbot/test/test_step_2/module_tests/test_module_internetdb.py index d24cdebc09..54ec6a163c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_internetdb.py +++ b/bbot/test/test_step_2/module_tests/test_module_internetdb.py @@ -5,7 +5,7 @@ class TestInternetDB(ModuleTestBase): config_overrides = {"dns_resolution": True} async def setup_before_prep(self, module_test): - module_test.mock_dns( + await module_test.mock_dns( { "blacklanternsecurity.com": {"A": ["1.2.3.4"]}, "autodiscover.blacklanternsecurity.com": {"A": ["2.3.4.5"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_ipneighbor.py b/bbot/test/test_step_2/module_tests/test_module_ipneighbor.py index b8ba8331a7..edb7dbff6d 100644 --- a/bbot/test/test_step_2/module_tests/test_module_ipneighbor.py +++ b/bbot/test/test_step_2/module_tests/test_module_ipneighbor.py @@ -6,7 +6,7 @@ class TestIPNeighbor(ModuleTestBase): config_overrides = {"scope_report_distance": 1, "dns_resolution": True, "scope_dns_search_distance": 2} async def setup_after_prep(self, module_test): - module_test.mock_dns( + await module_test.mock_dns( {"3.0.0.127.in-addr.arpa": {"PTR": ["asdf.www.bls.notreal"]}, "asdf.www.bls.notreal": {"A": ["127.0.0.3"]}} ) diff --git a/bbot/test/test_step_2/module_tests/test_module_postman.py b/bbot/test/test_step_2/module_tests/test_module_postman.py index 21f464054d..8e9c0f3bfc 100644 --- a/bbot/test/test_step_2/module_tests/test_module_postman.py +++ b/bbot/test/test_step_2/module_tests/test_module_postman.py @@ -235,7 +235,7 @@ async def new_emit_event(event_data, event_type, **kwargs): await old_emit_event(event_data, event_type, **kwargs) module_test.monkeypatch.setattr(module_test.module, "emit_event", new_emit_event) - module_test.mock_dns({"asdf.blacklanternsecurity.com": {"A": ["127.0.0.1"]}}) + await module_test.mock_dns({"asdf.blacklanternsecurity.com": {"A": ["127.0.0.1"]}}) request_args = dict(uri="/_api/request/28129865-987c8ac8-bfa9-4bab-ade9-88ccf0597862") respond_args = dict(response_data="https://asdf.blacklanternsecurity.com") diff --git a/bbot/test/test_step_2/module_tests/test_module_speculate.py b/bbot/test/test_step_2/module_tests/test_module_speculate.py index 2dcafaddcc..2f7d6b7f35 100644 --- a/bbot/test/test_step_2/module_tests/test_module_speculate.py +++ b/bbot/test/test_step_2/module_tests/test_module_speculate.py @@ -28,7 +28,7 @@ class TestSpeculate_OpenPorts(ModuleTestBase): config_overrides = {"speculate": True} async def setup_before_prep(self, module_test): - module_test.mock_dns( + await module_test.mock_dns( { "evilcorp.com": {"A": ["127.0.254.1"]}, "asdf.evilcorp.com": {"A": ["127.0.254.2"]}, From 2649425f80fb54d1e91f93b59193bf33f6cf513a Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 12:22:39 -0400 Subject: [PATCH 02/63] blacked --- bbot/core/helpers/dns/mock.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/core/helpers/dns/mock.py b/bbot/core/helpers/dns/mock.py index 0685a8e80d..de283d5172 100644 --- a/bbot/core/helpers/dns/mock.py +++ b/bbot/core/helpers/dns/mock.py @@ -1,5 +1,6 @@ import dns + class MockResolver: def __init__(self, mock_data=None): From ec2f89a67ec8d6540e689702ab620e9df9ce32ab Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 12:27:21 -0400 Subject: [PATCH 03/63] add pyzmq dependency --- poetry.lock | 107 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index a12258088f..818842c3a0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1901,6 +1901,111 @@ files = [ [package.dependencies] pyyaml = "*" +[[package]] +name = "pyzmq" +version = "25.1.2" +description = "Python bindings for 0MQ" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:e624c789359f1a16f83f35e2c705d07663ff2b4d4479bad35621178d8f0f6ea4"}, + {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:49151b0efece79f6a79d41a461d78535356136ee70084a1c22532fc6383f4ad0"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9a5f194cf730f2b24d6af1f833c14c10f41023da46a7f736f48b6d35061e76e"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:faf79a302f834d9e8304fafdc11d0d042266667ac45209afa57e5efc998e3872"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f51a7b4ead28d3fca8dda53216314a553b0f7a91ee8fc46a72b402a78c3e43d"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0ddd6d71d4ef17ba5a87becf7ddf01b371eaba553c603477679ae817a8d84d75"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:246747b88917e4867e2367b005fc8eefbb4a54b7db363d6c92f89d69abfff4b6"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:00c48ae2fd81e2a50c3485de1b9d5c7c57cd85dc8ec55683eac16846e57ac979"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a68d491fc20762b630e5db2191dd07ff89834086740f70e978bb2ef2668be08"}, + {file = "pyzmq-25.1.2-cp310-cp310-win32.whl", hash = "sha256:09dfe949e83087da88c4a76767df04b22304a682d6154de2c572625c62ad6886"}, + {file = "pyzmq-25.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:fa99973d2ed20417744fca0073390ad65ce225b546febb0580358e36aa90dba6"}, + {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:82544e0e2d0c1811482d37eef297020a040c32e0687c1f6fc23a75b75db8062c"}, + {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:01171fc48542348cd1a360a4b6c3e7d8f46cdcf53a8d40f84db6707a6768acc1"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc69c96735ab501419c432110016329bf0dea8898ce16fab97c6d9106dc0b348"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3e124e6b1dd3dfbeb695435dff0e383256655bb18082e094a8dd1f6293114642"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7598d2ba821caa37a0f9d54c25164a4fa351ce019d64d0b44b45540950458840"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d1299d7e964c13607efd148ca1f07dcbf27c3ab9e125d1d0ae1d580a1682399d"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4e6f689880d5ad87918430957297c975203a082d9a036cc426648fcbedae769b"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cc69949484171cc961e6ecd4a8911b9ce7a0d1f738fcae717177c231bf77437b"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9880078f683466b7f567b8624bfc16cad65077be046b6e8abb53bed4eeb82dd3"}, + {file = "pyzmq-25.1.2-cp311-cp311-win32.whl", hash = "sha256:4e5837af3e5aaa99a091302df5ee001149baff06ad22b722d34e30df5f0d9097"}, + {file = "pyzmq-25.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:25c2dbb97d38b5ac9fd15586e048ec5eb1e38f3d47fe7d92167b0c77bb3584e9"}, + {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:11e70516688190e9c2db14fcf93c04192b02d457b582a1f6190b154691b4c93a"}, + {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:313c3794d650d1fccaaab2df942af9f2c01d6217c846177cfcbc693c7410839e"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b3cbba2f47062b85fe0ef9de5b987612140a9ba3a9c6d2543c6dec9f7c2ab27"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc31baa0c32a2ca660784d5af3b9487e13b61b3032cb01a115fce6588e1bed30"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c9087b109070c5ab0b383079fa1b5f797f8d43e9a66c07a4b8b8bdecfd88ee"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f8429b17cbb746c3e043cb986328da023657e79d5ed258b711c06a70c2ea7537"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5074adeacede5f810b7ef39607ee59d94e948b4fd954495bdb072f8c54558181"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7ae8f354b895cbd85212da245f1a5ad8159e7840e37d78b476bb4f4c3f32a9fe"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b264bf2cc96b5bc43ce0e852be995e400376bd87ceb363822e2cb1964fcdc737"}, + {file = "pyzmq-25.1.2-cp312-cp312-win32.whl", hash = "sha256:02bbc1a87b76e04fd780b45e7f695471ae6de747769e540da909173d50ff8e2d"}, + {file = "pyzmq-25.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:ced111c2e81506abd1dc142e6cd7b68dd53747b3b7ae5edbea4578c5eeff96b7"}, + {file = "pyzmq-25.1.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:7b6d09a8962a91151f0976008eb7b29b433a560fde056ec7a3db9ec8f1075438"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:967668420f36878a3c9ecb5ab33c9d0ff8d054f9c0233d995a6d25b0e95e1b6b"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5edac3f57c7ddaacdb4d40f6ef2f9e299471fc38d112f4bc6d60ab9365445fb0"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0dabfb10ef897f3b7e101cacba1437bd3a5032ee667b7ead32bbcdd1a8422fe7"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:2c6441e0398c2baacfe5ba30c937d274cfc2dc5b55e82e3749e333aabffde561"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:16b726c1f6c2e7625706549f9dbe9b06004dfbec30dbed4bf50cbdfc73e5b32a"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:a86c2dd76ef71a773e70551a07318b8e52379f58dafa7ae1e0a4be78efd1ff16"}, + {file = "pyzmq-25.1.2-cp36-cp36m-win32.whl", hash = "sha256:359f7f74b5d3c65dae137f33eb2bcfa7ad9ebefd1cab85c935f063f1dbb245cc"}, + {file = "pyzmq-25.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:55875492f820d0eb3417b51d96fea549cde77893ae3790fd25491c5754ea2f68"}, + {file = "pyzmq-25.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b8c8a419dfb02e91b453615c69568442e897aaf77561ee0064d789705ff37a92"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8807c87fa893527ae8a524c15fc505d9950d5e856f03dae5921b5e9aa3b8783b"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5e319ed7d6b8f5fad9b76daa0a68497bc6f129858ad956331a5835785761e003"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:3c53687dde4d9d473c587ae80cc328e5b102b517447456184b485587ebd18b62"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9add2e5b33d2cd765ad96d5eb734a5e795a0755f7fc49aa04f76d7ddda73fd70"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e690145a8c0c273c28d3b89d6fb32c45e0d9605b2293c10e650265bf5c11cfec"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:00a06faa7165634f0cac1abb27e54d7a0b3b44eb9994530b8ec73cf52e15353b"}, + {file = "pyzmq-25.1.2-cp37-cp37m-win32.whl", hash = "sha256:0f97bc2f1f13cb16905a5f3e1fbdf100e712d841482b2237484360f8bc4cb3d7"}, + {file = "pyzmq-25.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6cc0020b74b2e410287e5942e1e10886ff81ac77789eb20bec13f7ae681f0fdd"}, + {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:bef02cfcbded83473bdd86dd8d3729cd82b2e569b75844fb4ea08fee3c26ae41"}, + {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e10a4b5a4b1192d74853cc71a5e9fd022594573926c2a3a4802020360aa719d8"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8c5f80e578427d4695adac6fdf4370c14a2feafdc8cb35549c219b90652536ae"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5dde6751e857910c1339890f3524de74007958557593b9e7e8c5f01cd919f8a7"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea1608dd169da230a0ad602d5b1ebd39807ac96cae1845c3ceed39af08a5c6df"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0f513130c4c361201da9bc69df25a086487250e16b5571ead521b31ff6b02220"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:019744b99da30330798bb37df33549d59d380c78e516e3bab9c9b84f87a9592f"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2e2713ef44be5d52dd8b8e2023d706bf66cb22072e97fc71b168e01d25192755"}, + {file = "pyzmq-25.1.2-cp38-cp38-win32.whl", hash = "sha256:07cd61a20a535524906595e09344505a9bd46f1da7a07e504b315d41cd42eb07"}, + {file = "pyzmq-25.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb7e49a17fb8c77d3119d41a4523e432eb0c6932187c37deb6fbb00cc3028088"}, + {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:94504ff66f278ab4b7e03e4cba7e7e400cb73bfa9d3d71f58d8972a8dc67e7a6"}, + {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6dd0d50bbf9dca1d0bdea219ae6b40f713a3fb477c06ca3714f208fd69e16fd8"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:004ff469d21e86f0ef0369717351073e0e577428e514c47c8480770d5e24a565"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c0b5ca88a8928147b7b1e2dfa09f3b6c256bc1135a1338536cbc9ea13d3b7add"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c9a79f1d2495b167119d02be7448bfba57fad2a4207c4f68abc0bab4b92925b"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:518efd91c3d8ac9f9b4f7dd0e2b7b8bf1a4fe82a308009016b07eaa48681af82"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1ec23bd7b3a893ae676d0e54ad47d18064e6c5ae1fadc2f195143fb27373f7f6"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db36c27baed588a5a8346b971477b718fdc66cf5b80cbfbd914b4d6d355e44e2"}, + {file = "pyzmq-25.1.2-cp39-cp39-win32.whl", hash = "sha256:39b1067f13aba39d794a24761e385e2eddc26295826530a8c7b6c6c341584289"}, + {file = "pyzmq-25.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:8e9f3fabc445d0ce320ea2c59a75fe3ea591fdbdeebec5db6de530dd4b09412e"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a8c1d566344aee826b74e472e16edae0a02e2a044f14f7c24e123002dcff1c05"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:759cfd391a0996345ba94b6a5110fca9c557ad4166d86a6e81ea526c376a01e8"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c61e346ac34b74028ede1c6b4bcecf649d69b707b3ff9dc0fab453821b04d1e"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cb8fc1f8d69b411b8ec0b5f1ffbcaf14c1db95b6bccea21d83610987435f1a4"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3c00c9b7d1ca8165c610437ca0c92e7b5607b2f9076f4eb4b095c85d6e680a1d"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:df0c7a16ebb94452d2909b9a7b3337940e9a87a824c4fc1c7c36bb4404cb0cde"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:45999e7f7ed5c390f2e87ece7f6c56bf979fb213550229e711e45ecc7d42ccb8"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ac170e9e048b40c605358667aca3d94e98f604a18c44bdb4c102e67070f3ac9b"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1b604734bec94f05f81b360a272fc824334267426ae9905ff32dc2be433ab96"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a793ac733e3d895d96f865f1806f160696422554e46d30105807fdc9841b9f7d"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0806175f2ae5ad4b835ecd87f5f85583316b69f17e97786f7443baaf54b9bb98"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ef12e259e7bc317c7597d4f6ef59b97b913e162d83b421dd0db3d6410f17a244"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea253b368eb41116011add00f8d5726762320b1bda892f744c91997b65754d73"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b9b1f2ad6498445a941d9a4fee096d387fee436e45cc660e72e768d3d8ee611"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:8b14c75979ce932c53b79976a395cb2a8cd3aaf14aef75e8c2cb55a330b9b49d"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:889370d5174a741a62566c003ee8ddba4b04c3f09a97b8000092b7ca83ec9c49"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a18fff090441a40ffda8a7f4f18f03dc56ae73f148f1832e109f9bffa85df15"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99a6b36f95c98839ad98f8c553d8507644c880cf1e0a57fe5e3a3f3969040882"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4345c9a27f4310afbb9c01750e9461ff33d6fb74cd2456b107525bbeebcb5be3"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3516e0b6224cf6e43e341d56da15fd33bdc37fa0c06af4f029f7d7dfceceabbc"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:146b9b1f29ead41255387fb07be56dc29639262c0f7344f570eecdcd8d683314"}, + {file = "pyzmq-25.1.2.tar.gz", hash = "sha256:93f1aa311e8bb912e34f004cf186407a4e90eec4f0ecc0efd26056bf7eda0226"}, +] + +[package.dependencies] +cffi = {version = "*", markers = "implementation_name == \"pypy\""} + [[package]] name = "regex" version = "2023.12.25" @@ -2431,4 +2536,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "30069bdb734215feeee60b9bbafeb5c7c8708044ea92c25465370dbad31d808f" +content-hash = "07cc1440e8eda2b5dc94d6cff919e332e36ea728fe786f217f17a25a64922683" diff --git a/pyproject.toml b/pyproject.toml index ecd8c18d61..70c839795a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ tldextract = "^5.1.1" cachetools = "^5.3.2" socksio = "^1.0.0" jinja2 = "^3.1.3" +pyzmq = "^25.1.2" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From 42232d020ce64b8114420896cdfdf6b2e066fb17 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 13:01:20 -0400 Subject: [PATCH 04/63] fix dns mocker --- bbot/core/config/logger.py | 6 +++++- bbot/core/engine.py | 2 +- bbot/core/helpers/dns/engine.py | 1 + bbot/core/helpers/dns/mock.py | 14 +++++++------- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/bbot/core/config/logger.py b/bbot/core/config/logger.py index 3844a65fcf..276993ead5 100644 --- a/bbot/core/config/logger.py +++ b/bbot/core/config/logger.py @@ -59,6 +59,7 @@ def __init__(self, core): self._loggers = None self._log_handlers = None self._log_level = None + self.root_logger = logging.getLogger() self.core_logger = logging.getLogger("bbot") self.core = core @@ -79,7 +80,10 @@ def setup_queue_handler(self, logging_queue=None): else: self.queue = logging_queue self.queue_handler = logging.handlers.QueueHandler(logging_queue) - logging.getLogger().addHandler(self.queue_handler) + + if self.queue_handler not in self.root_logger.handlers: + self.root_logger.addHandler(self.queue_handler) + self.core_logger.setLevel(self.log_level) # disable asyncio logging for child processes if self.process_name != "MainProcess": diff --git a/bbot/core/engine.py b/bbot/core/engine.py index 5f4530ba17..cf9b90f287 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -155,7 +155,7 @@ async def worker(self): try: while 1: client_id, binary = await self.socket.recv_multipart() - self.log.debug(f"{self.name} got binary: {binary}") + # self.log.debug(f"{self.name} got binary: {binary}") message = pickle.loads(binary) self.log.debug(f"{self.name} got message: {message}") diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 54a60b3280..470311b485 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -933,6 +933,7 @@ def in_tests(self): return os.getenv("BBOT_TESTING", "") == "True" async def _mock_dns(self, mock_data): + self.log.critical(f"SETTING MOCK RESOLVER") from .mock import MockResolver self.resolver = MockResolver(mock_data) diff --git a/bbot/core/helpers/dns/mock.py b/bbot/core/helpers/dns/mock.py index de283d5172..70d978affe 100644 --- a/bbot/core/helpers/dns/mock.py +++ b/bbot/core/helpers/dns/mock.py @@ -17,7 +17,7 @@ def create_dns_response(self, query_name, rdtype): query_name = query_name.strip(".") answers = self.mock_data.get(query_name, {}).get(rdtype, []) if not answers: - raise self.dns.resolver.NXDOMAIN(f"No answer found for {query_name} {rdtype}") + raise dns.resolver.NXDOMAIN(f"No answer found for {query_name} {rdtype}") message_text = f"""id 1234 opcode QUERY @@ -30,7 +30,7 @@ def create_dns_response(self, query_name, rdtype): message_text += f"\n{query_name}. 1 IN {rdtype} {answer}" message_text += "\n;AUTHORITY\n;ADDITIONAL\n" - message = self.dns.message.from_text(message_text) + message = dns.message.from_text(message_text) return message async def resolve(self, query_name, rdtype=None): @@ -41,16 +41,16 @@ async def resolve(self, query_name, rdtype=None): else: rdtype = str(rdtype.name).upper() - domain_name = self.dns.name.from_text(query_name) - rdtype_obj = self.dns.rdatatype.from_text(rdtype) + domain_name = dns.name.from_text(query_name) + rdtype_obj = dns.rdatatype.from_text(rdtype) if "_NXDOMAIN" in self.mock_data and query_name in self.mock_data["_NXDOMAIN"]: # Simulate the NXDOMAIN exception - raise self.dns.resolver.NXDOMAIN + raise dns.resolver.NXDOMAIN try: response = self.create_dns_response(query_name, rdtype) - answer = self.dns.resolver.Answer(domain_name, rdtype_obj, self.dns.rdataclass.IN, response) + answer = dns.resolver.Answer(domain_name, rdtype_obj, dns.rdataclass.IN, response) return answer - except self.dns.resolver.NXDOMAIN: + except dns.resolver.NXDOMAIN: return [] From 2a1941797118c5701522aab499bce43d0c9476c8 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 13:33:07 -0400 Subject: [PATCH 05/63] fix test_event --- bbot/test/bbot_fixtures.py | 6 ------ bbot/test/test_step_1/test_modules_basic.py | 5 ----- 2 files changed, 11 deletions(-) diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 29c27fa933..4452cc28f7 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -74,12 +74,6 @@ def scan(monkeypatch): from bbot.scanner import Scanner bbot_scan = Scanner("127.0.0.1", modules=["ipneighbor"]) - - fallback_nameservers_file = bbot_scan.helpers.bbot_home / "fallback_nameservers.txt" - with open(fallback_nameservers_file, "w") as f: - f.write("8.8.8.8\n") - monkeypatch.setattr(bbot_scan.helpers.dns, "fallback_nameservers_file", fallback_nameservers_file) - return bbot_scan diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 7b2997f650..b1891ce055 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -10,10 +10,6 @@ @pytest.mark.asyncio async def test_modules_basic(scan, helpers, events, bbot_scanner, httpx_mock): - fallback_nameservers = scan.helpers.temp_dir / "nameservers.txt" - with open(fallback_nameservers, "w") as f: - f.write("8.8.8.8\n") - for http_method in ("GET", "CONNECT", "HEAD", "POST", "PUT", "TRACE", "DEBUG", "PATCH", "DELETE", "OPTIONS"): httpx_mock.add_response(method=http_method, url=re.compile(r".*"), json={"test": "test"}) @@ -85,7 +81,6 @@ async def test_modules_basic(scan, helpers, events, bbot_scanner, httpx_mock): config={i: True for i in available_internal_modules}, force_start=True, ) - scan2.helpers.dns.fallback_nameservers_file = fallback_nameservers await scan2.load_modules() scan2.status = "RUNNING" From cd015a6aca3f6892d839a444934a4d3dae0aae9c Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 13:41:33 -0400 Subject: [PATCH 06/63] working on tests --- .../test_step_1/test_manager_deduplication.py | 26 +------------------ .../test_manager_scope_accuracy.py | 6 ++--- bbot/test/test_step_1/test_scan.py | 1 - 3 files changed, 4 insertions(+), 29 deletions(-) diff --git a/bbot/test/test_step_1/test_manager_deduplication.py b/bbot/test/test_step_1/test_manager_deduplication.py index 63305d0e4e..d3221d554d 100644 --- a/bbot/test/test_step_1/test_manager_deduplication.py +++ b/bbot/test/test_step_1/test_manager_deduplication.py @@ -3,7 +3,7 @@ @pytest.mark.asyncio -async def test_manager_deduplication(bbot_scanner, mock_dns): +async def test_manager_deduplication(bbot_scanner): class DefaultModule(BaseModule): _name = "default_module" @@ -90,30 +90,6 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) _dns_mock=dns_mock_chain, ) - # SCAN / severe_jacqueline (SCAN:d233093d39044c961754c97f749fa758543d7474) - # DNS_NAME / test.notreal - # OPEN_TCP_PORT / default_module.test.notreal:88 - # OPEN_TCP_PORT / test.notreal:88 - # DNS_NAME / default_module.test.notreal - # OPEN_TCP_PORT / no_suppress_dupes.test.notreal:88 - # OPEN_TCP_PORT / accept_dupes.test.notreal:88 - # DNS_NAME / per_hostport_only.test.notreal - # DNS_NAME / no_suppress_dupes.test.notreal - # OPEN_TCP_PORT / no_suppress_dupes.test.notreal:88 - # DNS_NAME / no_suppress_dupes.test.notreal - # DNS_NAME / no_suppress_dupes.test.notreal - # DNS_NAME / accept_dupes.test.notreal - # OPEN_TCP_PORT / per_domain_only.test.notreal:88 - # DNS_NAME / no_suppress_dupes.test.notreal - # DNS_NAME / no_suppress_dupes.test.notreal - # OPEN_TCP_PORT / no_suppress_dupes.test.notreal:88 - # OPEN_TCP_PORT / no_suppress_dupes.test.notreal:88 - # DNS_NAME / per_domain_only.test.notreal - # OPEN_TCP_PORT / per_hostport_only.test.notreal:88 - # OPEN_TCP_PORT / no_suppress_dupes.test.notreal:88 - - for e in events: - log.critical(f"{e.type} / {e.data} / {e.module} / {e.source.data} / {e.source.module}") assert len(events) == 21 assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "accept_dupes.test.notreal" and str(e.module) == "accept_dupes"]) assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "default_module.test.notreal" and str(e.module) == "default_module"]) diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index f3b589d7c9..f111dbf947 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -31,7 +31,7 @@ def bbot_other_httpservers(): @pytest.mark.asyncio -async def test_manager_scope_accuracy(bbot_scanner, bbot_httpserver, bbot_other_httpservers, bbot_httpserver_ssl, mock_dns): +async def test_manager_scope_accuracy(bbot_scanner, bbot_httpserver, bbot_other_httpservers, bbot_httpserver_ssl): """ This test ensures that BBOT correctly handles different scope distance settings. It performs these tests for normal modules, output modules, and their graph variants, @@ -101,7 +101,7 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) scan.modules["dummy_graph_output_module"] = dummy_graph_output_module scan.modules["dummy_graph_batch_output_module"] = dummy_graph_batch_output_module if _dns_mock: - mock_dns(scan, _dns_mock) + await scan.helpers.dns._mock_dns(_dns_mock) if scan_callback is not None: scan_callback(scan) return ( @@ -787,7 +787,7 @@ def custom_setup(scan): @pytest.mark.asyncio -async def test_manager_blacklist(bbot_scanner, bbot_httpserver, caplog, mock_dns): +async def test_manager_blacklist(bbot_scanner, bbot_httpserver, caplog): bbot_httpserver.expect_request(uri="/").respond_with_data(response_data="") diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index ace0cad7b1..e5648c4ee0 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -7,7 +7,6 @@ async def test_scan( helpers, monkeypatch, bbot_scanner, - mock_dns, ): scan0 = bbot_scanner( "1.1.1.1/31", From 45f8f1a3063b9bbc4347bf13d76e693d5c20b6e0 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 14:22:02 -0400 Subject: [PATCH 07/63] more work on tests --- bbot/core/helpers/dns/dns.py | 24 ++++++++++- bbot/core/helpers/dns/engine.py | 42 +++++++------------ .../test_manager_scope_accuracy.py | 1 + 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 1a9474277d..dd1d61e751 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -2,8 +2,10 @@ import logging import dns.exception import dns.asyncresolver +from contextlib import suppress from bbot.core.engine import EngineClient +from bbot.core.errors import ValidationError from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name, host_in_host from .engine import DNSEngine @@ -93,7 +95,27 @@ async def resolve_event(self, event, minimal=False): event_host = str(event.host) event_type = str(event.type) kwargs = {"event_host": event_host, "event_type": event_type, "minimal": minimal} - return await self.run_and_return("resolve_event", **kwargs) + event_tags, dns_children = await self.run_and_return("resolve_event", **kwargs) + + # whitelisting / blacklisting based on resolved hosts + event_whitelisted = False + event_blacklisted = False + for rdtype, children in dns_children.items(): + for host in children: + if rdtype in ("A", "AAAA", "CNAME"): + # having a CNAME to an in-scope resource doesn't make you in-scope + if rdtype != "CNAME": + with suppress(ValidationError): + if self.parent_helper.scan.whitelisted(host): + self.log.critical(f"{event_host} --> {host} is whitelisted") + event_whitelisted = True + # CNAME to a blacklisted resources, means you're blacklisted + with suppress(ValidationError): + if self.parent_helper.scan.blacklisted(host): + self.log.critical(f"{event_host} --> {host} is blacklisted") + event_blacklisted = True + + return event_tags, event_whitelisted, event_blacklisted, dns_children async def is_wildcard(self, query, ips=None, rdtype=None): """ diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 470311b485..8c6d26eb46 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -445,8 +445,6 @@ async def resolve_event(self, event_host, event_type, minimal=False): Returns: tuple: A 4-tuple containing the following items: - event_tags (set): Set of tags for the event. - - event_whitelisted (bool): Whether the event is whitelisted. - - event_blacklisted (bool): Whether the event is blacklisted. - dns_children (dict): Dictionary containing child events from DNS resolutions. Examples: @@ -461,18 +459,19 @@ async def resolve_event(self, event_host, event_type, minimal=False): log.debug(f"Resolving event {event_type}:{event_host}") event_tags = set() dns_children = dict() - event_whitelisted = False - event_blacklisted = False try: # lock to ensure resolution of the same host doesn't start while we're working here async with self._event_cache_locks.lock(event_host): # try to get data from cache - _event_tags, _event_whitelisted, _event_blacklisted, _dns_children = self.event_cache_get(event_host) - event_tags.update(_event_tags) - # if we found it, return it - if _event_whitelisted is not None: - return event_tags, _event_whitelisted, _event_blacklisted, _dns_children + try: + _event_tags, _dns_children = self._event_cache[event_host] + event_tags.update(_event_tags) + # if we found it, return it + if _event_tags is not None: + return event_tags, _dns_children + except KeyError: + _event_tags, _dns_children = set(), set() # then resolve types = () @@ -497,22 +496,11 @@ async def resolve_event(self, event_host, event_type, minimal=False): event_tags.add("resolved") event_tags.add(f"{rdtype.lower()}-record") - # whitelisting and blacklisting of IPs for r in records: for _, t in self.extract_targets(r): if t: ip = make_ip_type(t) - # TODO: transplant this - # if rdtype in ("A", "AAAA", "CNAME"): - # with contextlib.suppress(ValidationError): - # if self.parent_helper.is_ip(ip): - # if self.parent_helper.preset.whitelisted(ip): - # event_whitelisted = True - # with contextlib.suppress(ValidationError): - # if self.parent_helper.preset.blacklisted(ip): - # event_blacklisted = True - if self.filter_bad_ptrs and rdtype in ("PTR") and is_ptr(t): self.debug(f"Filtering out bad PTR: {t}") continue @@ -549,9 +537,9 @@ async def resolve_event(self, event_host, event_type, minimal=False): except ValueError: continue - self._event_cache[event_host] = (event_tags, event_whitelisted, event_blacklisted, dns_children) + self._event_cache[event_host] = (event_tags, dns_children) - return event_tags, event_whitelisted, event_blacklisted, dns_children + return event_tags, dns_children finally: log.debug(f"Finished resolving event {event_type}:{event_host}") @@ -566,8 +554,6 @@ def event_cache_get(self, host): Returns: tuple: A 4-tuple containing the following items: - event_tags (set): Set of tags for the event. - - event_whitelisted (bool or None): Whether the event is whitelisted. Returns None if not found. - - event_blacklisted (bool or None): Whether the event is blacklisted. Returns None if not found. - dns_children (set): Set containing child events from DNS resolutions. Examples: @@ -579,13 +565,13 @@ def event_cache_get(self, host): Assuming no event with host "www.notincache.com" has been cached: >>> event_cache_get("www.notincache.com") - (set(), None, None, set()) + (set(), set()) """ try: - event_tags, event_whitelisted, event_blacklisted, dns_children = self._event_cache[host] - return (event_tags, event_whitelisted, event_blacklisted, dns_children) + event_tags, dns_children = self._event_cache[host] + return (event_tags, dns_children) except KeyError: - return set(), None, None, set() + return set(), set() async def resolve_batch(self, queries, **kwargs): """ diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index f111dbf947..d1c17f1852 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -807,6 +807,7 @@ async def test_manager_blacklist(bbot_scanner, bbot_httpserver, caplog): events = [e async for e in scan.async_start()] assert any([e for e in events if e.type == "URL_UNVERIFIED" and e.data == "http://www-dev.test.notreal:8888/"]) + # the hostname is in-scope, but its IP is blacklisted, therefore we shouldn't see it assert not any([e for e in events if e.type == "URL_UNVERIFIED" and e.data == "http://www-prod.test.notreal:8888/"]) assert 'Omitting due to blacklisted DNS associations: URL_UNVERIFIED("http://www-prod.test.notreal:8888/"' in caplog.text From 98bbb73bcecb5f1055eabf0387d85048b7bb01f6 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 14:49:10 -0400 Subject: [PATCH 08/63] still working on tests --- bbot/core/helpers/dns/dns.py | 7 ++++--- bbot/core/helpers/dns/engine.py | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index dd1d61e751..3bcd92b10d 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -101,19 +101,20 @@ async def resolve_event(self, event, minimal=False): event_whitelisted = False event_blacklisted = False for rdtype, children in dns_children.items(): + if event_blacklisted: + break for host in children: if rdtype in ("A", "AAAA", "CNAME"): # having a CNAME to an in-scope resource doesn't make you in-scope - if rdtype != "CNAME": + if not event_whitelisted and rdtype != "CNAME": with suppress(ValidationError): if self.parent_helper.scan.whitelisted(host): - self.log.critical(f"{event_host} --> {host} is whitelisted") event_whitelisted = True # CNAME to a blacklisted resources, means you're blacklisted with suppress(ValidationError): if self.parent_helper.scan.blacklisted(host): - self.log.critical(f"{event_host} --> {host} is blacklisted") event_blacklisted = True + break return event_tags, event_whitelisted, event_blacklisted, dns_children diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 8c6d26eb46..c463391d62 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -919,7 +919,6 @@ def in_tests(self): return os.getenv("BBOT_TESTING", "") == "True" async def _mock_dns(self, mock_data): - self.log.critical(f"SETTING MOCK RESOLVER") from .mock import MockResolver self.resolver = MockResolver(mock_data) From 7bc49b07d3ec59f0eada06ac4d398b91b25e586f Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 15:19:41 -0400 Subject: [PATCH 09/63] logging tweaks --- bbot/core/config/logger.py | 17 +++++++++-------- bbot/test/bbot_fixtures.py | 5 ----- bbot/test/test_step_1/test_modules_basic.py | 2 +- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/bbot/core/config/logger.py b/bbot/core/config/logger.py index 276993ead5..9e4d9ed905 100644 --- a/bbot/core/config/logger.py +++ b/bbot/core/config/logger.py @@ -63,6 +63,8 @@ def __init__(self, core): self.core_logger = logging.getLogger("bbot") self.core = core + self.listener = None + self.process_name = multiprocessing.current_process().name if self.process_name == "MainProcess": self.queue = multiprocessing.Queue() @@ -81,8 +83,7 @@ def setup_queue_handler(self, logging_queue=None): self.queue = logging_queue self.queue_handler = logging.handlers.QueueHandler(logging_queue) - if self.queue_handler not in self.root_logger.handlers: - self.root_logger.addHandler(self.queue_handler) + self.root_logger.addHandler(self.queue_handler) self.core_logger.setLevel(self.log_level) # disable asyncio logging for child processes @@ -153,16 +154,16 @@ def add_log_handler(self, handler, formatter=None): return if handler.formatter is None: handler.setFormatter(debug_format) - for logger in self.loggers: - if handler not in self.listener.handlers: - logger.addHandler(handler) + if handler not in self.listener.handlers: + self.listener.handlers = self.listener.handlers + (handler,) def remove_log_handler(self, handler): if self.listener is None: return - for logger in self.loggers: - if handler in self.listener.handlers: - logger.removeHandler(handler) + if handler in self.listener.handlers: + new_handlers = list(self.listener.handlers) + new_handlers.remove(handler) + self.listener.handlers = tuple(new_handlers) def include_logger(self, logger): if logger not in self.loggers: diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 4452cc28f7..cf806e14dc 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -31,11 +31,6 @@ available_internal_modules = list(DEFAULT_PRESET.module_loader.configs(type="internal")) -@pytest.fixture(scope="session", autouse=True) -def setup_logging(): - CORE.logger.setup_queue_handler() - - @pytest.fixture def clean_default_config(monkeypatch): clean_config = OmegaConf.merge( diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index b1891ce055..7f01428e66 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -300,7 +300,7 @@ async def test_modules_basic_perdomainonly(scan, helpers, events, bbot_scanner, @pytest.mark.asyncio -async def test_modules_basic_stats(helpers, events, bbot_scanner, httpx_mock, monkeypatch, mock_dns): +async def test_modules_basic_stats(helpers, events, bbot_scanner, httpx_mock, monkeypatch): from bbot.modules.base import BaseModule class dummy(BaseModule): From 7a7224fb1b81f9efbb579255b42b3bf3be89afb3 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 15:37:06 -0400 Subject: [PATCH 10/63] more tests --- bbot/test/test_step_2/module_tests/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/base.py b/bbot/test/test_step_2/module_tests/base.py index 8db36cd8d5..7793530dff 100644 --- a/bbot/test/test_step_2/module_tests/base.py +++ b/bbot/test/test_step_2/module_tests/base.py @@ -94,7 +94,7 @@ def set_expect_requests_handler(self, expect_args=None, request_handler=None): async def mock_dns(self, mock_data, scan=None): if scan is None: scan = self.scan - await scan.helpers.dns._dns_mock(mock_data) + await scan.helpers.dns._mock_dns(mock_data) @property def module(self): From 9d2c093f411d72b2b139415eb7f2f4bc6bc0aada Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 20:00:41 -0400 Subject: [PATCH 11/63] reintroduce dns parallelism --- bbot/core/config/logger.py | 4 +- bbot/core/core.py | 5 +- bbot/core/engine.py | 4 +- bbot/core/helpers/dns/dns.py | 4 +- bbot/core/helpers/dns/engine.py | 217 +++++++++--------- bbot/test/test_step_1/test_dns.py | 48 ++-- .../module_tests/test_module_web_report.py | 2 - 7 files changed, 143 insertions(+), 141 deletions(-) diff --git a/bbot/core/config/logger.py b/bbot/core/config/logger.py index 9e4d9ed905..a5724599d9 100644 --- a/bbot/core/config/logger.py +++ b/bbot/core/config/logger.py @@ -76,7 +76,7 @@ def __init__(self, core): self.log_level = logging.INFO - def setup_queue_handler(self, logging_queue=None): + def setup_queue_handler(self, logging_queue=None, log_level=logging.DEBUG): if logging_queue is None: logging_queue = self.queue else: @@ -85,7 +85,7 @@ def setup_queue_handler(self, logging_queue=None): self.root_logger.addHandler(self.queue_handler) - self.core_logger.setLevel(self.log_level) + self.core_logger.setLevel(log_level) # disable asyncio logging for child processes if self.process_name != "MainProcess": logging.getLogger("asyncio").setLevel(logging.ERROR) diff --git a/bbot/core/core.py b/bbot/core/core.py index a383077635..1c43e50351 100644 --- a/bbot/core/core.py +++ b/bbot/core/core.py @@ -26,6 +26,7 @@ class BBOTProcess(multiprocessing.Process): def __init__(self, *args, **kwargs): self.logging_queue = kwargs.pop("logging_queue") + self.log_level = kwargs.pop("log_level") super().__init__(*args, **kwargs) def run(self): @@ -33,7 +34,7 @@ def run(self): try: from bbot.core import CORE - CORE.logger.setup_queue_handler(self.logging_queue) + CORE.logger.setup_queue_handler(self.logging_queue, self.log_level) super().run() except KeyboardInterrupt: log.warning(f"Got KeyboardInterrupt in {self.name}") @@ -165,7 +166,7 @@ def files_config(self): return self._files_config def create_process(self, *args, **kwargs): - process = self.BBOTProcess(*args, logging_queue=self.logger.queue, **kwargs) + process = self.BBOTProcess(*args, logging_queue=self.logger.queue, log_level=self.logger.log_level, **kwargs) process.daemon = True return process diff --git a/bbot/core/engine.py b/bbot/core/engine.py index cf9b90f287..3badd4d420 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -39,7 +39,7 @@ async def run_and_return(self, command, **kwargs): message = self.make_message(command, args=kwargs) await socket.send(message) binary = await socket.recv() - self.log.debug(f"{self.name}.{command}({kwargs}) got binary: {binary}") + # self.log.debug(f"{self.name}.{command}({kwargs}) got binary: {binary}") message = pickle.loads(binary) self.log.debug(f"{self.name}.{command}({kwargs}) got message: {message}") # error handling @@ -53,7 +53,7 @@ async def run_and_yield(self, command, **kwargs): await socket.send(message) while 1: binary = await socket.recv() - self.log.debug(f"{self.name}.{command}({kwargs}) got binary: {binary}") + # self.log.debug(f"{self.name}.{command}({kwargs}) got binary: {binary}") message = pickle.loads(binary) self.log.debug(f"{self.name}.{command}({kwargs}) got message: {message}") # error handling diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 3bcd92b10d..432d0c0303 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -82,8 +82,8 @@ async def resolve_batch(self, queries, **kwargs): async for _ in self.run_and_yield("resolve_batch", queries=queries, **kwargs): yield _ - async def resolve_custom_batch(self, queries): - async for _ in self.run_and_yield("resolve_custom_batch", queries=queries): + async def resolve_raw_batch(self, queries): + async for _ in self.run_and_yield("resolve_raw_batch", queries=queries): yield _ async def resolve_event(self, event, minimal=False): diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index c463391d62..7104a732e6 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -37,7 +37,7 @@ class DNSEngine(EngineServer): 0: "resolve", 1: "resolve_event", 2: "resolve_batch", - 3: "resolve_custom_batch", + 3: "resolve_raw_batch", 4: "is_wildcard", 5: "is_wildcard_domain", 99: "_mock_dns", @@ -101,7 +101,7 @@ def __init__(self, socket_path, config={}): self.filter_bad_ptrs = self.config.get("dns_filter_ptrs", True) - async def resolve(self, query, include_errors=False, **kwargs): + async def resolve(self, query, **kwargs): """Resolve DNS names and IP addresses to their corresponding results. This is a high-level function that can translate a given domain name to its associated IP addresses @@ -125,23 +125,17 @@ async def resolve(self, query, include_errors=False, **kwargs): results = set() errors = [] try: - r = await self.resolve_raw(query, **kwargs) - if r: - raw_results, errors = r - for rdtype, answers in raw_results: - for answer in answers: - for _, t in self.extract_targets(answer): - results.add(t) + answers, errors = await self.resolve_raw(query, **kwargs) + for answer in answers: + for _, host in self.extract_targets(answer): + results.add(host) except BaseException: log.trace(f"Caught exception in resolve({query}, {kwargs}):") log.trace(traceback.format_exc()) raise self.debug(f"Results for {query} with kwargs={kwargs}: {results}") - if include_errors: - return results, errors - else: - return results + return results async def resolve_raw(self, query, **kwargs): """Resolves the given query to its associated DNS records. @@ -168,39 +162,24 @@ async def resolve_raw(self, query, **kwargs): ([('PTR', )], []) >>> await resolve_raw("dns.google") - ([('A', ), ('AAAA', )], []) + (, []) """ # DNS over TCP is more reliable # But setting this breaks DNS resolution on Ubuntu because systemd-resolve doesn't support TCP # kwargs["tcp"] = True - results = [] - errors = [] try: query = str(query).strip() + kwargs.pop("rdtype", None) + rdtype = kwargs.pop("type", "A") if is_ip(query): - kwargs.pop("type", None) - kwargs.pop("rdtype", None) - results, errors = await self._resolve_ip(query, **kwargs) - return [("PTR", results)], [("PTR", e) for e in errors] + return await self._resolve_ip(query, **kwargs) else: - types = ["A", "AAAA"] - kwargs.pop("rdtype", None) - if "type" in kwargs: - t = kwargs.pop("type") - types = self._parse_rdtype(t, default=types) - for t in types: - r, e = await self._resolve_hostname(query, rdtype=t, **kwargs) - if r: - results.append((t, r)) - for error in e: - errors.append((t, error)) + return await self._resolve_hostname(query, rdtype=rdtype, **kwargs) except BaseException: log.trace(f"Caught exception in resolve_raw({query}, {kwargs}):") log.trace(traceback.format_exc()) raise - return (results, errors) - async def _resolve_hostname(self, query, **kwargs): """Translate a hostname into its corresponding IP addresses. @@ -483,61 +462,57 @@ async def resolve_event(self, event_host, event_type, minimal=False): types = self.all_rdtypes else: types = ("A", "AAAA") - - if types: - for t in types: - resolved_raw, errors = await self.resolve_raw(event_host, type=t, use_cache=True) - for rdtype, e in errors: - if rdtype not in resolved_raw: - event_tags.add(f"{rdtype.lower()}-error") - for rdtype, records in resolved_raw: - rdtype = str(rdtype).upper() - if records: - event_tags.add("resolved") - event_tags.add(f"{rdtype.lower()}-record") - - for r in records: - for _, t in self.extract_targets(r): - if t: - ip = make_ip_type(t) - - if self.filter_bad_ptrs and rdtype in ("PTR") and is_ptr(t): - self.debug(f"Filtering out bad PTR: {t}") - continue - - try: - dns_children[rdtype].add(ip) - except KeyError: - dns_children[rdtype] = {ip} - - # tag with cloud providers - if not self.in_tests: - to_check = set() - if event_type == "IP_ADDRESS": - to_check.add(event_host) - for rdtype, ips in dns_children.items(): - if rdtype in ("A", "AAAA"): - for ip in ips: - to_check.add(ip) - for ip in to_check: - provider, provider_type, subnet = cloudcheck(ip) - if provider: - event_tags.add(f"{provider_type}-{provider}") - - # if needed, mark as unresolved - if not is_ip(event_host) and "resolved" not in event_tags: - event_tags.add("unresolved") - # check for private IPs + queries = [(event_host, t) for t in types] + async for (query, rdtype), (answers, errors) in self.resolve_raw_batch(queries): + if answers: + rdtype = str(rdtype).upper() + event_tags.add("resolved") + event_tags.add(f"{rdtype.lower()}-record") + + for host, _rdtype in answers: + if host: + host = make_ip_type(host) + + if self.filter_bad_ptrs and rdtype in ("PTR") and is_ptr(host): + self.debug(f"Filtering out bad PTR: {host}") + continue + + try: + dns_children[_rdtype].add(host) + except KeyError: + dns_children[_rdtype] = {host} + + elif errors: + event_tags.add(f"{rdtype.lower()}-error") + + # tag with cloud providers + if not self.in_tests: + to_check = set() + if event_type == "IP_ADDRESS": + to_check.add(event_host) for rdtype, ips in dns_children.items(): - for ip in ips: - try: - ip = ipaddress.ip_address(ip) - if ip.is_private: - event_tags.add("private-ip") - except ValueError: - continue - - self._event_cache[event_host] = (event_tags, dns_children) + if rdtype in ("A", "AAAA"): + for ip in ips: + to_check.add(ip) + for ip in to_check: + provider, provider_type, subnet = cloudcheck(ip) + if provider: + event_tags.add(f"{provider_type}-{provider}") + + # if needed, mark as unresolved + if not is_ip(event_host) and "resolved" not in event_tags: + event_tags.add("unresolved") + # check for private IPs + for rdtype, ips in dns_children.items(): + for ip in ips: + try: + ip = ipaddress.ip_address(ip) + if ip.is_private: + event_tags.add("private-ip") + except ValueError: + continue + + self._event_cache[event_host] = (event_tags, dns_children) return event_tags, dns_children @@ -594,12 +569,39 @@ async def resolve_batch(self, queries, **kwargs): """ for q in queries: - yield (q, await self.resolve(q, **kwargs)) + results = await self.resolve(q, **kwargs) + # if results: + yield (q, results) - async def resolve_custom_batch(self, queries): - for query, rdtype in queries: - answers, errors = await self.resolve(query, type=rdtype, include_errors=True) - yield ((query, rdtype), (answers, errors)) + async def resolve_raw_batch(self, queries, threads=10): + tasks = {} + + def new_task(query, rdtype): + task = asyncio.create_task(self.resolve_raw(query, type=rdtype)) + tasks[task] = (query, rdtype) + + queries = list(queries) + for _ in range(threads): # Start initial batch of tasks + if queries: # Ensure there are args to process + new_task(*queries.pop(0)) + + while tasks: # While there are tasks pending + # Wait for the first task to complete + done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) + + for task in done: + answers, errors = task.result() + query, rdtype = tasks.pop(task) + + results = set() + for answer in answers: + for rdtype, host in self.extract_targets(answer): + results.add((host, rdtype)) + # if results or errors: + yield ((query, rdtype), (results, errors)) + + if queries: # Start a new task for each one completed, if URLs remain + new_task(*queries.pop(0)) def extract_targets(self, record): """ @@ -726,9 +728,9 @@ async def is_wildcard(self, query, ips=None, rdtype=None): if ips is None: # then resolve the query for all rdtypes queries = [(query, t) for t in rdtypes_to_check] - async for (query, _rdtype), (answers, errors) in self.resolve_custom_batch(queries): + async for (query, _rdtype), (answers, errors) in self.resolve_raw_batch(queries): if answers: - query_baseline[_rdtype] = answers + query_baseline[_rdtype] = set([a[0] for a in answers]) else: if errors: self.debug(f"Failed to resolve {query} ({_rdtype}) during wildcard detection") @@ -839,22 +841,23 @@ async def is_wildcard_domain(self, domain, log_info=False): # resolve a bunch of random subdomains of the same parent is_wildcard = False wildcard_results = dict() + + queries = [] for rdtype in list(rdtypes_to_check): - # continue if a wildcard was already found for this rdtype - # if rdtype in self._wildcard_cache[host_hash]: - # continue for _ in range(self.wildcard_tests): rand_query = f"{rand_string(digits=False, length=10)}.{host}" - results = await self.resolve(rand_query, type=rdtype, use_cache=False) - if results: - is_wildcard = True - if not rdtype in wildcard_results: - wildcard_results[rdtype] = set() - wildcard_results[rdtype].update(results) - # we know this rdtype is a wildcard - # so we don't need to check it anymore - with suppress(KeyError): - rdtypes_to_check.remove(rdtype) + queries.append((rand_query, rdtype)) + + async for (query, rdtype), (answers, errors) in self.resolve_raw_batch(queries): + if answers: + is_wildcard = True + if not rdtype in wildcard_results: + wildcard_results[rdtype] = set() + wildcard_results[rdtype].update(set(a[0] for a in answers)) + # we know this rdtype is a wildcard + # so we don't need to check it anymore + with suppress(KeyError): + rdtypes_to_check.remove(rdtype) self._wildcard_cache.update({host_hash: wildcard_results}) wildcard_domain_results.update({host: wildcard_results}) diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index 594192568d..1ff9ace924 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -6,7 +6,7 @@ async def test_dns_engine(bbot_scanner): scan = bbot_scanner() result = await scan.helpers.resolve("one.one.one.one") assert "1.1.1.1" in result - assert "2606:4700:4700::1111" in result + assert not "2606:4700:4700::1111" in result results = [_ async for _ in scan.helpers.resolve_batch(("one.one.one.one", "1.1.1.1"))] pass_1 = False @@ -18,13 +18,14 @@ async def test_dns_engine(bbot_scanner): pass_2 = True assert pass_1 and pass_2 - results = [_ async for _ in scan.helpers.resolve_custom_batch((("one.one.one.one", "A"), ("1.1.1.1", "PTR")))] + results = [_ async for _ in scan.helpers.resolve_raw_batch((("one.one.one.one", "A"), ("1.1.1.1", "PTR")))] pass_1 = False pass_2 = False for (query, rdtype), (result, errors) in results: - if query == "one.one.one.one" and "1.1.1.1" in result: + _results = [r[0] for r in result] + if query == "one.one.one.one" and "1.1.1.1" in _results: pass_1 = True - elif query == "1.1.1.1" and "one.one.one.one" in result: + elif query == "1.1.1.1" and "one.one.one.one" in _results: pass_2 = True assert pass_1 and pass_2 @@ -46,23 +47,22 @@ async def test_dns(bbot_scanner): assert ip_responses[0].response.answer[0][0].target.to_text() in ("one.one.one.one.",) # mid level functions - _responses, errors = await dnsengine.resolve_raw("one.one.one.one") + answers, errors = await dnsengine.resolve_raw("one.one.one.one", type="A") responses = [] - for rdtype, response in _responses: - for answers in response: - responses += list(dnsengine.extract_targets(answers)) + for answer in answers: + responses += list(dnsengine.extract_targets(answer)) assert ("A", "1.1.1.1") in responses - _responses, errors = await dnsengine.resolve_raw("one.one.one.one", rdtype="AAAA") + assert not ("AAAA", "2606:4700:4700::1111") in responses + answers, errors = await dnsengine.resolve_raw("one.one.one.one", type="AAAA") responses = [] - for rdtype, response in _responses: - for answers in response: - responses += list(dnsengine.extract_targets(answers)) + for answer in answers: + responses += list(dnsengine.extract_targets(answer)) + assert not ("A", "1.1.1.1") in responses assert ("AAAA", "2606:4700:4700::1111") in responses - _responses, errors = await dnsengine.resolve_raw("1.1.1.1") + answers, errors = await dnsengine.resolve_raw("1.1.1.1") responses = [] - for rdtype, response in _responses: - for answers in response: - responses += list(dnsengine.extract_targets(answers)) + for answer in answers: + responses += list(dnsengine.extract_targets(answer)) assert ("PTR", "one.one.one.one") in responses # high level functions @@ -80,15 +80,11 @@ async def test_dns(bbot_scanner): assert "one.one.one.one" in batch_results["1.1.1.1"] # custom batch resolution - batch_results = [r async for r in dnsengine.resolve_custom_batch([("1.1.1.1", "PTR"), ("one.one.one.one", "A")])] + batch_results = [r async for r in dnsengine.resolve_raw_batch([("1.1.1.1", "PTR"), ("one.one.one.one", "A")])] assert len(batch_results) == 2 batch_results = dict(batch_results) - assert any([x in batch_results[("one.one.one.one", "A")][0] for x in ("1.1.1.1", "1.0.0.1")]) - assert "one.one.one.one" in batch_results[("1.1.1.1", "PTR")][0] - - # "any" type - resolved = await dnsengine.resolve("google.com", type="any") - assert any([scan.helpers.is_subdomain(h) for h in resolved]) + assert ("1.1.1.1", "A") in batch_results[("one.one.one.one", "A")][0] + assert ("one.one.one.one", "PTR") in batch_results[("1.1.1.1", "PTR")][0] # dns cache dnsengine._dns_cache.clear() @@ -103,9 +99,13 @@ async def test_dns(bbot_scanner): await dnsengine.resolve("1.1.1.1") assert hash(f"1.1.1.1:PTR") in dnsengine._dns_cache - await dnsengine.resolve("one.one.one.one") + await dnsengine.resolve("one.one.one.one", type="A") assert hash(f"one.one.one.one:A") in dnsengine._dns_cache + assert not hash(f"one.one.one.one:AAAA") in dnsengine._dns_cache + dnsengine._dns_cache.clear() + await dnsengine.resolve("one.one.one.one", type="AAAA") assert hash(f"one.one.one.one:AAAA") in dnsengine._dns_cache + assert not hash(f"one.one.one.one:A") in dnsengine._dns_cache # Ensure events with hosts have resolved_hosts attribute populated resolved_hosts_event1 = scan.make_event("one.one.one.one", "DNS_NAME", dummy=True) diff --git a/bbot/test/test_step_2/module_tests/test_module_web_report.py b/bbot/test/test_step_2/module_tests/test_module_web_report.py index a37c178e23..c34eef00f2 100644 --- a/bbot/test/test_step_2/module_tests/test_module_web_report.py +++ b/bbot/test/test_step_2/module_tests/test_module_web_report.py @@ -13,8 +13,6 @@ async def setup_before_prep(self, module_test): module_test.set_expect_requests(respond_args=respond_args) def check(self, module_test, events): - for e in events: - module_test.log.critical(e) report_file = module_test.scan.home / "web_report.html" with open(report_file) as f: report_content = f.read() From f257fe626c78ba2a64fc2bd30f94a363a3dc34bf Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 20:37:56 -0400 Subject: [PATCH 12/63] performance improvements --- bbot/core/helpers/dns/engine.py | 35 +++++++++++++++++++++++++-------- bbot/modules/base.py | 6 ++++-- bbot/scanner/scanner.py | 2 +- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 7104a732e6..c10c2c8a2f 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -548,7 +548,7 @@ def event_cache_get(self, host): except KeyError: return set(), set() - async def resolve_batch(self, queries, **kwargs): + async def resolve_batch(self, queries, threads=10, **kwargs): """ A helper to execute a bunch of DNS requests. @@ -566,12 +566,31 @@ async def resolve_batch(self, queries, **kwargs): ... print(result) ('www.evilcorp.com', {'1.1.1.1'}) ('evilcorp.com', {'2.2.2.2'}) - """ - for q in queries: - results = await self.resolve(q, **kwargs) - # if results: - yield (q, results) + tasks = {} + + def new_task(query): + task = asyncio.create_task(self.resolve(query, **kwargs)) + tasks[task] = query + + queries = list(queries) + for _ in range(threads): # Start initial batch of tasks + if queries: # Ensure there are args to process + new_task(queries.pop(0)) + + while tasks: # While there are tasks pending + # Wait for the first task to complete + done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) + + for task in done: + results = task.result() + query = tasks.pop(task) + + if results: + yield (query, results) + + if queries: # Start a new task for each one completed, if URLs remain + new_task(queries.pop(0)) async def resolve_raw_batch(self, queries, threads=10): tasks = {} @@ -597,8 +616,8 @@ def new_task(query, rdtype): for answer in answers: for rdtype, host in self.extract_targets(answer): results.add((host, rdtype)) - # if results or errors: - yield ((query, rdtype), (results, errors)) + if results or errors: + yield ((query, rdtype), (results, errors)) if queries: # Start a new task for each one completed, if URLs remain new_task(*queries.pop(0)) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index 093fa96803..f81f0f81a6 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -474,7 +474,7 @@ async def emit_event(self, *args, **kwargs): if event: await self.queue_outgoing_event(event, **emit_kwargs) - async def _events_waiting(self): + async def _events_waiting(self, batch_size=None): """ Asynchronously fetches events from the incoming_event_queue, up to a specified batch size. @@ -492,10 +492,12 @@ async def _events_waiting(self): - "FINISHED" events are handled differently and the finish flag is set to True. - If the queue is empty or the batch size is reached, the loop breaks. """ + if batch_size is None: + batch_size = self.batch_size events = [] finish = False while self.incoming_event_queue: - if len(events) > self.batch_size: + if batch_size != -1 and len(events) > self.batch_size: break try: event = self.incoming_event_queue.get_nowait() diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index a849ae6e7e..bd17cc089c 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -332,7 +332,7 @@ async def async_start(self): break if "python" in self.modules: - events, finish = await self.modules["python"]._events_waiting() + events, finish = await self.modules["python"]._events_waiting(batch_size=-1) for e in events: yield e From 9a29253438b6d5a4659d7c87528a0de7020e3418 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 2 Apr 2024 21:19:24 -0400 Subject: [PATCH 13/63] fix massdns tests --- bbot/modules/massdns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index e453ee8d45..a99487ed17 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -233,7 +233,7 @@ async def resolve_and_emit(self): results, source_event, tags = await self.resolve_and_emit_queue.get() self.verbose(f"Resolving batch of {len(results):,} results") async with self._task_counter.count(f"{self.name}.resolve_and_emit()"): - async for hostname, r in self.helpers.resolve_batch(results, type=("A", "CNAME")): + async for hostname, r in self.helpers.resolve_batch(results, type="A"): if not r: self.debug(f"Discarding {hostname} because it didn't resolve") continue From 35ecd6c70d4306db5adbe4f670a2069bf53d00f6 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 4 Apr 2024 10:33:32 -0400 Subject: [PATCH 14/63] faster event dns caching --- bbot/core/helpers/dns/dns.py | 68 +++++++++++----- bbot/core/helpers/dns/engine.py | 129 +++++++++++++----------------- bbot/test/test_step_1/test_dns.py | 4 + 3 files changed, 108 insertions(+), 93 deletions(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 432d0c0303..4f25e62fdf 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -2,10 +2,12 @@ import logging import dns.exception import dns.asyncresolver +from cachetools import LRUCache from contextlib import suppress from bbot.core.engine import EngineClient from bbot.core.errors import ValidationError +from bbot.core.helpers.async_helpers import NamedLock from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name, host_in_host from .engine import DNSEngine @@ -70,6 +72,10 @@ def __init__(self, parent_helper): self.wildcard_ignore = [] self.wildcard_ignore = tuple([str(d).strip().lower() for d in self.wildcard_ignore]) + # event resolution cache + self._event_cache = LRUCache(maxsize=10000) + self._event_cache_locks = NamedLock() + # copy the system's current resolvers to a text file for tool use self.system_resolvers = dns.resolver.Resolver().nameservers # TODO: DNS server speed test (start in background task) @@ -94,29 +100,51 @@ async def resolve_event(self, event, minimal=False): event_host = str(event.host) event_type = str(event.type) - kwargs = {"event_host": event_host, "event_type": event_type, "minimal": minimal} - event_tags, dns_children = await self.run_and_return("resolve_event", **kwargs) - - # whitelisting / blacklisting based on resolved hosts + event_tags = set() + dns_children = dict() event_whitelisted = False event_blacklisted = False - for rdtype, children in dns_children.items(): - if event_blacklisted: - break - for host in children: - if rdtype in ("A", "AAAA", "CNAME"): - # having a CNAME to an in-scope resource doesn't make you in-scope - if not event_whitelisted and rdtype != "CNAME": + + if (not event.host) or (event.type in ("IP_RANGE",)): + return event_tags, event_whitelisted, event_blacklisted, dns_children + + # lock to ensure resolution of the same host doesn't start while we're working here + async with self._event_cache_locks.lock(event_host): + # try to get data from cache + try: + _event_tags, _event_whitelisted, _event_blacklisted, _dns_children = self._event_cache[event_host] + event_tags.update(_event_tags) + # if we found it, return it + if _event_whitelisted is not None: + return event_tags, _event_whitelisted, _event_blacklisted, _dns_children + except KeyError: + pass + + kwargs = {"event_host": event_host, "event_type": event_type, "minimal": minimal} + event_tags, dns_children = await self.run_and_return("resolve_event", **kwargs) + + # whitelisting / blacklisting based on resolved hosts + event_whitelisted = False + event_blacklisted = False + for rdtype, children in dns_children.items(): + if event_blacklisted: + break + for host in children: + if rdtype in ("A", "AAAA", "CNAME"): + # having a CNAME to an in-scope resource doesn't make you in-scope + if not event_whitelisted and rdtype != "CNAME": + with suppress(ValidationError): + if self.parent_helper.scan.whitelisted(host): + event_whitelisted = True + # CNAME to a blacklisted resources, means you're blacklisted with suppress(ValidationError): - if self.parent_helper.scan.whitelisted(host): - event_whitelisted = True - # CNAME to a blacklisted resources, means you're blacklisted - with suppress(ValidationError): - if self.parent_helper.scan.blacklisted(host): - event_blacklisted = True - break - - return event_tags, event_whitelisted, event_blacklisted, dns_children + if self.parent_helper.scan.blacklisted(host): + event_blacklisted = True + break + + self._event_cache[event_host] = (event_tags, event_whitelisted, event_blacklisted, dns_children) + + return event_tags, event_whitelisted, event_blacklisted, dns_children async def is_wildcard(self, query, ips=None, rdtype=None): """ diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index c10c2c8a2f..1031cef1ff 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -96,8 +96,6 @@ def __init__(self, socket_path, config={}): self._errors = dict() self._debug = self.config.get("dns_debug", False) self._dns_cache = LRUCache(maxsize=10000) - self._event_cache = LRUCache(maxsize=10000) - self._event_cache_locks = NamedLock() self.filter_bad_ptrs = self.config.get("dns_filter_ptrs", True) @@ -440,79 +438,64 @@ async def resolve_event(self, event_host, event_type, minimal=False): dns_children = dict() try: - # lock to ensure resolution of the same host doesn't start while we're working here - async with self._event_cache_locks.lock(event_host): - # try to get data from cache - try: - _event_tags, _dns_children = self._event_cache[event_host] - event_tags.update(_event_tags) - # if we found it, return it - if _event_tags is not None: - return event_tags, _dns_children - except KeyError: - _event_tags, _dns_children = set(), set() - - # then resolve - types = () - if is_ip(event_host): - if not minimal: - types = ("PTR",) + types = () + if is_ip(event_host): + if not minimal: + types = ("PTR",) + else: + if event_type == "DNS_NAME" and not minimal: + types = self.all_rdtypes else: - if event_type == "DNS_NAME" and not minimal: - types = self.all_rdtypes - else: - types = ("A", "AAAA") - queries = [(event_host, t) for t in types] - async for (query, rdtype), (answers, errors) in self.resolve_raw_batch(queries): - if answers: - rdtype = str(rdtype).upper() - event_tags.add("resolved") - event_tags.add(f"{rdtype.lower()}-record") - - for host, _rdtype in answers: - if host: - host = make_ip_type(host) - - if self.filter_bad_ptrs and rdtype in ("PTR") and is_ptr(host): - self.debug(f"Filtering out bad PTR: {host}") - continue - - try: - dns_children[_rdtype].add(host) - except KeyError: - dns_children[_rdtype] = {host} - - elif errors: - event_tags.add(f"{rdtype.lower()}-error") - - # tag with cloud providers - if not self.in_tests: - to_check = set() - if event_type == "IP_ADDRESS": - to_check.add(event_host) - for rdtype, ips in dns_children.items(): - if rdtype in ("A", "AAAA"): - for ip in ips: - to_check.add(ip) - for ip in to_check: - provider, provider_type, subnet = cloudcheck(ip) - if provider: - event_tags.add(f"{provider_type}-{provider}") - - # if needed, mark as unresolved - if not is_ip(event_host) and "resolved" not in event_tags: - event_tags.add("unresolved") - # check for private IPs + types = ("A", "AAAA") + queries = [(event_host, t) for t in types] + async for (query, rdtype), (answers, errors) in self.resolve_raw_batch(queries): + if answers: + rdtype = str(rdtype).upper() + event_tags.add("resolved") + event_tags.add(f"{rdtype.lower()}-record") + + for host, _rdtype in answers: + if host: + host = make_ip_type(host) + + if self.filter_bad_ptrs and rdtype in ("PTR") and is_ptr(host): + self.debug(f"Filtering out bad PTR: {host}") + continue + + try: + dns_children[_rdtype].add(host) + except KeyError: + dns_children[_rdtype] = {host} + + elif errors: + event_tags.add(f"{rdtype.lower()}-error") + + # tag with cloud providers + if not self.in_tests: + to_check = set() + if event_type == "IP_ADDRESS": + to_check.add(event_host) for rdtype, ips in dns_children.items(): - for ip in ips: - try: - ip = ipaddress.ip_address(ip) - if ip.is_private: - event_tags.add("private-ip") - except ValueError: - continue - - self._event_cache[event_host] = (event_tags, dns_children) + if rdtype in ("A", "AAAA"): + for ip in ips: + to_check.add(ip) + for ip in to_check: + provider, provider_type, subnet = cloudcheck(ip) + if provider: + event_tags.add(f"{provider_type}-{provider}") + + # if needed, mark as unresolved + if not is_ip(event_host) and "resolved" not in event_tags: + event_tags.add("unresolved") + # check for private IPs + for rdtype, ips in dns_children.items(): + for ip in ips: + try: + ip = ipaddress.ip_address(ip) + if ip.is_private: + event_tags.add("private-ip") + except ValueError: + continue return event_tags, dns_children diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index 1ff9ace924..07beca1f25 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -110,9 +110,13 @@ async def test_dns(bbot_scanner): # Ensure events with hosts have resolved_hosts attribute populated resolved_hosts_event1 = scan.make_event("one.one.one.one", "DNS_NAME", dummy=True) resolved_hosts_event2 = scan.make_event("http://one.one.one.one/", "URL_UNVERIFIED", dummy=True) + assert resolved_hosts_event1.host not in scan.helpers.dns._event_cache + assert resolved_hosts_event2.host not in scan.helpers.dns._event_cache event_tags1, event_whitelisted1, event_blacklisted1, children1 = await scan.helpers.resolve_event( resolved_hosts_event1 ) + assert resolved_hosts_event1.host in scan.helpers.dns._event_cache + assert resolved_hosts_event2.host in scan.helpers.dns._event_cache event_tags2, event_whitelisted2, event_blacklisted2, children2 = await scan.helpers.resolve_event( resolved_hosts_event2 ) From 19329cbf42d097b87ffcac7d65f8d57bc8e1389d Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 4 Apr 2024 10:56:58 -0400 Subject: [PATCH 15/63] remove obsolete massdns logic --- bbot/cli.py | 3 +- bbot/core/config/files.py | 2 +- bbot/core/config/logger.py | 2 +- bbot/core/helpers/dns/dns.py | 2 +- bbot/core/helpers/dns/engine.py | 2 +- bbot/core/helpers/logger.py | 52 ---------------------- bbot/core/modules.py | 2 +- bbot/modules/massdns.py | 76 ++++++++++----------------------- bbot/modules/output/human.py | 2 +- 9 files changed, 30 insertions(+), 113 deletions(-) delete mode 100644 bbot/core/helpers/logger.py diff --git a/bbot/cli.py b/bbot/cli.py index d12616b929..b3ac3f4304 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -3,6 +3,7 @@ import sys from bbot.errors import * from bbot import __version__ +from bbot.logger import log_to_stderr silent = "-s" in sys.argv or "--silent" in sys.argv @@ -32,8 +33,6 @@ async def _main(): # fix tee buffering sys.stdout.reconfigure(line_buffering=True) - from bbot.core.helpers.logger import log_to_stderr - log = logging.getLogger("bbot.cli") from bbot.scanner import Scanner diff --git a/bbot/core/config/files.py b/bbot/core/config/files.py index 80704cdbd0..1df185c154 100644 --- a/bbot/core/config/files.py +++ b/bbot/core/config/files.py @@ -4,7 +4,7 @@ from ..helpers.misc import mkdir from ...errors import ConfigLoadError -from ..helpers.logger import log_to_stderr +from ...logger import log_to_stderr bbot_code_dir = Path(__file__).parent.parent.parent diff --git a/bbot/core/config/logger.py b/bbot/core/config/logger.py index 40ea3c96fd..b6aec39aa3 100644 --- a/bbot/core/config/logger.py +++ b/bbot/core/config/logger.py @@ -7,7 +7,7 @@ from pathlib import Path from ..helpers.misc import mkdir, error_and_exit -from ..helpers.logger import colorize, loglevel_mapping +from ...logger import colorize, loglevel_mapping debug_format = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s %(filename)s:%(lineno)s %(message)s") diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 4f25e62fdf..58e00a9b15 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -5,8 +5,8 @@ from cachetools import LRUCache from contextlib import suppress +from bbot.errors import ValidationError from bbot.core.engine import EngineClient -from bbot.core.errors import ValidationError from bbot.core.helpers.async_helpers import NamedLock from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name, host_in_host diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 1031cef1ff..2b5903292b 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -9,8 +9,8 @@ from cachetools import LRUCache from ..regexes import dns_name_regex +from bbot.errors import DNSWildcardBreak from bbot.core.engine import EngineServer -from bbot.core.errors import DNSWildcardBreak from bbot.core.helpers.async_helpers import NamedLock from bbot.core.helpers.misc import ( clean_dns_record, diff --git a/bbot/core/helpers/logger.py b/bbot/core/helpers/logger.py deleted file mode 100644 index b70d4b4b41..0000000000 --- a/bbot/core/helpers/logger.py +++ /dev/null @@ -1,52 +0,0 @@ -import sys - -loglevel_mapping = { - "DEBUG": "DBUG", - "TRACE": "TRCE", - "VERBOSE": "VERB", - "HUGEVERBOSE": "VERB", - "INFO": "INFO", - "HUGEINFO": "INFO", - "SUCCESS": "SUCC", - "HUGESUCCESS": "SUCC", - "WARNING": "WARN", - "HUGEWARNING": "WARN", - "ERROR": "ERRR", - "CRITICAL": "CRIT", -} -color_mapping = { - "DEBUG": 242, # grey - "TRACE": 242, # red - "VERBOSE": 242, # grey - "INFO": 69, # blue - "HUGEINFO": 69, # blue - "SUCCESS": 118, # green - "HUGESUCCESS": 118, # green - "WARNING": 208, # orange - "HUGEWARNING": 208, # orange - "ERROR": 196, # red - "CRITICAL": 196, # red -} -color_prefix = "\033[1;38;5;" -color_suffix = "\033[0m" - - -def colorize(s, level="INFO"): - seq = color_mapping.get(level, 15) # default white - colored = f"{color_prefix}{seq}m{s}{color_suffix}" - return colored - - -def log_to_stderr(msg, level="INFO", logname=True): - """ - Print to stderr with BBOT logger colors - """ - levelname = level.upper() - if not any(x in sys.argv for x in ("-s", "--silent")): - levelshort = f"[{loglevel_mapping.get(level, 'INFO')}]" - levelshort = f"{colorize(levelshort, level=levelname)}" - if levelname == "CRITICAL" or levelname.startswith("HUGE"): - msg = colorize(msg, level=levelname) - if logname: - msg = f"{levelshort} {msg}" - print(msg, file=sys.stderr) diff --git a/bbot/core/modules.py b/bbot/core/modules.py index 62929230ca..2f3ce445c9 100644 --- a/bbot/core/modules.py +++ b/bbot/core/modules.py @@ -13,9 +13,9 @@ from contextlib import suppress from bbot.core import CORE +from bbot.logger import log_to_stderr from .flags import flag_descriptions -from .helpers.logger import log_to_stderr from .helpers.misc import list_files, sha1, search_dict_by_key, search_format_dict, make_table, os_platform, mkdir diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index a99487ed17..cad536dfdb 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -1,7 +1,6 @@ import re import json import random -import asyncio import subprocess from bbot.modules.templates.subdomain_enum import subdomain_enum @@ -101,10 +100,8 @@ async def setup(self): cache_hrs=24 * 7, ) self.devops_mutations = list(self.helpers.word_cloud.devops_mutations) - self.mutation_run = 1 + self._mutation_run = 1 - self.resolve_and_emit_queue = asyncio.Queue() - self.resolve_and_emit_task = asyncio.create_task(self.resolve_and_emit()) return await super().setup() async def filter_event(self, event): @@ -137,8 +134,8 @@ async def handle_event(self, event): query = self.make_query(event) self.source_events.add_target(event) self.info(f"Brute-forcing subdomains for {query} (source: {event.data})") - results = await self.massdns(query, self.subdomain_list) - await self.resolve_and_emit_queue.put((results, event, None)) + for hostname in await self.massdns(query, self.subdomain_list): + await self.emit_result(hostname, event, query) def abort_if(self, event): if not event.scope_distance == 0: @@ -149,6 +146,13 @@ def abort_if(self, event): return True, "event is unresolved" return False, "" + async def emit_result(self, result, source_event, query, tags=None): + if not result == source_event: + kwargs = {"abort_if": self.abort_if} + if tags is not None: + kwargs["tags"] = tags + await self.emit_event(result, "DNS_NAME", source_event, **kwargs) + def already_processed(self, hostname): if hash(hostname) in self.processed: return True @@ -221,35 +225,6 @@ async def massdns(self, domain, subdomains): # everything checks out return results - async def resolve_and_emit(self): - """ - When results are found, they are placed into self.resolve_and_emit_queue. - The purpose of this function (which is started as a task in the module's setup()) is to consume results from - the queue, resolve them, and if they resolve, emit them. - - This exists to prevent disrupting the scan with huge batches of DNS resolutions. - """ - while 1: - results, source_event, tags = await self.resolve_and_emit_queue.get() - self.verbose(f"Resolving batch of {len(results):,} results") - async with self._task_counter.count(f"{self.name}.resolve_and_emit()"): - async for hostname, r in self.helpers.resolve_batch(results, type="A"): - if not r: - self.debug(f"Discarding {hostname} because it didn't resolve") - continue - self.add_found(hostname) - if source_event is None: - source_event = self.source_events.get(hostname) - if source_event is None: - self.warning(f"Could not correlate source event from: {hostname}") - source_event = self.scan.root_event - kwargs = {"abort_if": self.abort_if, "tags": tags} - await self.emit_event(hostname, "DNS_NAME", source_event, **kwargs) - - @property - def running(self): - return super().running or self.resolve_and_emit_queue.qsize() > 0 - async def _canary_check(self, domain, num_checks=50): random_subdomains = list(self.gen_random_subdomains(num_checks)) self.verbose(f"Testing {len(random_subdomains):,} canaries against {domain}") @@ -378,9 +353,6 @@ def add_mutation(_domain_hash, m): self.mutations_tried.add(h) mutations.add(m) - num_base_mutations = len(base_mutations) - self.debug(f"Base mutations for {domain}: {num_base_mutations:,}") - # try every subdomain everywhere else for _domain, _subdomains in found: if _domain == domain: @@ -388,7 +360,10 @@ def add_mutation(_domain_hash, m): for s in _subdomains: first_segment = s.split(".")[0] # skip stuff with lots of numbers (e.g. PTRs) - if self.has_excessive_digits(first_segment): + digits = self.digit_regex.findall(first_segment) + excessive_digits = len(digits) > 2 + long_digits = any(len(d) > 3 for d in digits) + if excessive_digits or long_digits: continue add_mutation(domain_hash, first_segment) for word in self.helpers.extract_words( @@ -396,9 +371,6 @@ def add_mutation(_domain_hash, m): ): add_mutation(domain_hash, word) - num_massdns_mutations = len(mutations) - num_base_mutations - self.debug(f"Mutations from previous subdomains for {domain}: {num_massdns_mutations:,}") - # numbers + devops mutations for mutation in self.helpers.word_cloud.mutations( subdomains, cloud=False, numbers=3, number_padding=1 @@ -407,26 +379,24 @@ def add_mutation(_domain_hash, m): m = delimiter.join(mutation).lower() add_mutation(domain_hash, m) - num_word_cloud_mutations = len(mutations) - num_massdns_mutations - self.debug(f"Mutations added by word cloud for {domain}: {num_word_cloud_mutations:,}") - # special dns mutator - self.debug( - f"DNS Mutator size: {len(self.helpers.word_cloud.dns_mutator):,} (limited to {self.max_mutations:,})" - ) for subdomain in self.helpers.word_cloud.dns_mutator.mutations( subdomains, max_mutations=self.max_mutations ): add_mutation(domain_hash, subdomain) - num_mutations = len(mutations) - num_word_cloud_mutations - self.debug(f"Mutations added by DNS Mutator: {num_mutations:,}") - if mutations: self.info(f"Trying {len(mutations):,} mutations against {domain} ({i+1}/{len(found)})") results = list(await self.massdns(query, mutations)) + for hostname in results: + source_event = self.source_events.get(hostname) + if source_event is None: + self.warning(f"Could not correlate source event from: {hostname}") + source_event = self.scan.root_event + await self.emit_result( + hostname, source_event, query, tags=[f"mutation-{self._mutation_run}"] + ) if results: - await self.resolve_and_emit_queue.put((results, None, [f"mutation-{self.mutation_run}"])) found_mutations = True continue break @@ -434,7 +404,7 @@ def add_mutation(_domain_hash, m): self.warning(e) if found_mutations: - self.mutation_run += 1 + self._mutation_run += 1 def add_found(self, host): if not isinstance(host, str): diff --git a/bbot/modules/output/human.py b/bbot/modules/output/human.py index e1f4746c42..389a4bd84a 100644 --- a/bbot/modules/output/human.py +++ b/bbot/modules/output/human.py @@ -1,6 +1,6 @@ from contextlib import suppress -from bbot.core.helpers.logger import log_to_stderr +from bbot.logger import log_to_stderr from bbot.modules.output.base import BaseOutputModule From b54b567cb8c89e297b97da23036d09b43a403d32 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 4 Apr 2024 10:57:09 -0400 Subject: [PATCH 16/63] add logger.py --- bbot/logger.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 bbot/logger.py diff --git a/bbot/logger.py b/bbot/logger.py new file mode 100644 index 0000000000..b70d4b4b41 --- /dev/null +++ b/bbot/logger.py @@ -0,0 +1,52 @@ +import sys + +loglevel_mapping = { + "DEBUG": "DBUG", + "TRACE": "TRCE", + "VERBOSE": "VERB", + "HUGEVERBOSE": "VERB", + "INFO": "INFO", + "HUGEINFO": "INFO", + "SUCCESS": "SUCC", + "HUGESUCCESS": "SUCC", + "WARNING": "WARN", + "HUGEWARNING": "WARN", + "ERROR": "ERRR", + "CRITICAL": "CRIT", +} +color_mapping = { + "DEBUG": 242, # grey + "TRACE": 242, # red + "VERBOSE": 242, # grey + "INFO": 69, # blue + "HUGEINFO": 69, # blue + "SUCCESS": 118, # green + "HUGESUCCESS": 118, # green + "WARNING": 208, # orange + "HUGEWARNING": 208, # orange + "ERROR": 196, # red + "CRITICAL": 196, # red +} +color_prefix = "\033[1;38;5;" +color_suffix = "\033[0m" + + +def colorize(s, level="INFO"): + seq = color_mapping.get(level, 15) # default white + colored = f"{color_prefix}{seq}m{s}{color_suffix}" + return colored + + +def log_to_stderr(msg, level="INFO", logname=True): + """ + Print to stderr with BBOT logger colors + """ + levelname = level.upper() + if not any(x in sys.argv for x in ("-s", "--silent")): + levelshort = f"[{loglevel_mapping.get(level, 'INFO')}]" + levelshort = f"{colorize(levelshort, level=levelname)}" + if levelname == "CRITICAL" or levelname.startswith("HUGE"): + msg = colorize(msg, level=levelname) + if logname: + msg = f"{levelshort} {msg}" + print(msg, file=sys.stderr) From 60049ac40fdb88806652f17d617c70543fc52628 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sun, 7 Apr 2024 04:55:04 -0400 Subject: [PATCH 17/63] make sure zmq sockets are always cleaned up at the end of a scan --- bbot/core/engine.py | 7 +++++-- bbot/scanner/scanner.py | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/bbot/core/engine.py b/bbot/core/engine.py index 3badd4d420..06f965c456 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -33,6 +33,7 @@ def __init__(self, **kwargs): self.server_kwargs = kwargs.pop("server_kwargs", {}) self.server_process = self.start_server() self.context = zmq.asyncio.Context() + atexit.register(self.cleanup) async def run_and_return(self, command, **kwargs): with self.new_socket() as socket: @@ -111,6 +112,10 @@ def new_socket(self): finally: socket.close() + def cleanup(self): + # delete socket file on exit + self.socket_path.unlink(missing_ok=True) + class EngineServer: @@ -126,8 +131,6 @@ def __init__(self, socket_path): self.socket = self.context.socket(zmq.ROUTER) # create socket file self.socket.bind(f"ipc://{socket_path}") - # delete socket file on exit - atexit.register(socket_path.unlink, missing_ok=True) async def run_and_return(self, client_id, command_fn, **kwargs): self.log.debug(f"{self.name} run-and-return {command_fn.__name__}({kwargs})") diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 6ce949cb25..c7aaf8f447 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -644,8 +644,12 @@ async def _cleanup(self): None """ self.status = "CLEANING_UP" + # clean up dns engine + self.helpers.dns.cleanup() + # clean up modules for mod in self.modules.values(): await mod._cleanup() + # clean up self if not self._cleanedup: self._cleanedup = True with contextlib.suppress(Exception): From b2121a200b50e71e97a401f683c4d95bfe246bef Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 8 Apr 2024 15:11:43 -0400 Subject: [PATCH 18/63] WIP module hooks --- README.md | 2 +- bbot/core/event/base.py | 4 +- bbot/core/helpers/dns/dns.py | 44 ++---- bbot/modules/base.py | 93 +++++++++++- bbot/modules/httpx.py | 2 - bbot/modules/internal/dnsresolve.py | 75 ++++++++++ bbot/scanner/manager.py | 142 ++++++++----------- bbot/scanner/scanner.py | 38 +++-- poetry.lock | 213 +++++++++++++++++++++++++++- pyproject.toml | 1 + 10 files changed, 474 insertions(+), 140 deletions(-) create mode 100644 bbot/modules/internal/dnsresolve.py diff --git a/README.md b/README.md index 1a6f79d9db..1ac582caea 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![bbot_banner](https://user-images.githubusercontent.com/20261699/158000235-6c1ace81-a267-4f8e-90a1-f4c16884ebac.png)](https://github.com/blacklanternsecurity/bbot) -#### BBOT /ˈBEE·bot/ (noun): A recursive internet scanner for hackers. +#### /ˈBEE·bot/ (noun): A recursive internet scanner for hackers. [![Python Version](https://img.shields.io/badge/python-3.9+-FF8400)](https://www.python.org) [![License](https://img.shields.io/badge/license-GPLv3-FF8400.svg)](https://github.com/blacklanternsecurity/bbot/blob/dev/LICENSE) [![DEF CON Demo Labs 2023](https://img.shields.io/badge/DEF%20CON%20Demo%20Labs-2023-FF8400.svg)](https://forum.defcon.org/node/246338) [![PyPi Downloads](https://static.pepy.tech/personalized-badge/bbot?right_color=orange&left_color=grey)](https://pepy.tech/project/bbot) [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Tests](https://github.com/blacklanternsecurity/bbot/actions/workflows/tests.yml/badge.svg?branch=stable)](https://github.com/blacklanternsecurity/bbot/actions?query=workflow%3A"tests") [![Codecov](https://codecov.io/gh/blacklanternsecurity/bbot/branch/dev/graph/badge.svg?token=IR5AZBDM5K)](https://codecov.io/gh/blacklanternsecurity/bbot) [![Discord](https://img.shields.io/discord/859164869970362439)](https://discord.com/invite/PZqkgxu5SA) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index cd6533dd40..bf030ebc28 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -154,6 +154,7 @@ def __init__( self._priority = None self._module_priority = None self._resolved_hosts = set() + self.dns_children = dict() # keep track of whether this event has been recorded by the scan self._stats_recorded = False @@ -210,9 +211,6 @@ def __init__( if _internal: # or source._internal: self.internal = True - # an event indicating whether the event has undergone DNS resolution - self._resolved = asyncio.Event() - # inherit web spider distance from parent self.web_spider_distance = getattr(self.source, "web_spider_distance", 0) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 58e00a9b15..f9c505540b 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -102,8 +102,6 @@ async def resolve_event(self, event, minimal=False): event_type = str(event.type) event_tags = set() dns_children = dict() - event_whitelisted = False - event_blacklisted = False if (not event.host) or (event.type in ("IP_RANGE",)): return event_tags, event_whitelisted, event_blacklisted, dns_children @@ -112,39 +110,20 @@ async def resolve_event(self, event, minimal=False): async with self._event_cache_locks.lock(event_host): # try to get data from cache try: - _event_tags, _event_whitelisted, _event_blacklisted, _dns_children = self._event_cache[event_host] + _event_tags, _dns_children = self._event_cache[event_host] event_tags.update(_event_tags) # if we found it, return it if _event_whitelisted is not None: - return event_tags, _event_whitelisted, _event_blacklisted, _dns_children + return event_tags, _dns_children except KeyError: pass kwargs = {"event_host": event_host, "event_type": event_type, "minimal": minimal} event_tags, dns_children = await self.run_and_return("resolve_event", **kwargs) - # whitelisting / blacklisting based on resolved hosts - event_whitelisted = False - event_blacklisted = False - for rdtype, children in dns_children.items(): - if event_blacklisted: - break - for host in children: - if rdtype in ("A", "AAAA", "CNAME"): - # having a CNAME to an in-scope resource doesn't make you in-scope - if not event_whitelisted and rdtype != "CNAME": - with suppress(ValidationError): - if self.parent_helper.scan.whitelisted(host): - event_whitelisted = True - # CNAME to a blacklisted resources, means you're blacklisted - with suppress(ValidationError): - if self.parent_helper.scan.blacklisted(host): - event_blacklisted = True - break - - self._event_cache[event_host] = (event_tags, event_whitelisted, event_blacklisted, dns_children) + self._event_cache[event_host] = (event_tags, dns_children) - return event_tags, event_whitelisted, event_blacklisted, dns_children + return event_tags, dns_children async def is_wildcard(self, query, ips=None, rdtype=None): """ @@ -204,7 +183,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): async def is_wildcard_domain(self, domain, log_info=False): return await self.run_and_return("is_wildcard_domain", domain=domain, log_info=False) - async def handle_wildcard_event(self, event, children): + async def handle_wildcard_event(self, event): """ Used within BBOT's scan manager to detect and tag DNS wildcard events. @@ -212,19 +191,18 @@ async def handle_wildcard_event(self, event, children): is overwritten, for example: `_wildcard.evilcorp.com`. Args: - event (object): The event to check for wildcards. - children (list): A list of the event's resulting DNS children after resolution. + event (Event): The event to check for wildcards. Returns: None: This method modifies the `event` in place and does not return a value. Examples: - >>> handle_wildcard_event(event, children) + >>> handle_wildcard_event(event) # The `event` might now have tags like ["wildcard", "a-wildcard", "aaaa-wildcard"] and # its `data` attribute might be modified to "_wildcard.evilcorp.com" if it was detected # as a wildcard. """ - log.debug(f"Entering handle_wildcard_event({event}, children={children})") + log.debug(f"Entering handle_wildcard_event({event}, children={event.dns_children})") try: event_host = str(event.host) # wildcard checks @@ -239,10 +217,10 @@ async def handle_wildcard_event(self, event, children): event.add_tag(f"{rdtype.lower()}-{wildcard_tag}") # wildcard event modification (www.evilcorp.com --> _wildcard.evilcorp.com) - if not is_ip(event.host) and children: + if (not is_ip(event.host)) and event.dns_children: if wildcard_rdtypes: # these are the rdtypes that successfully resolve - resolved_rdtypes = set([c.upper() for c in children]) + resolved_rdtypes = set([c.upper() for c in event.dns_children]) # these are the rdtypes that have wildcards wildcard_rdtypes_set = set(wildcard_rdtypes) # consider the event a full wildcard if all its records are wildcards @@ -276,7 +254,7 @@ async def handle_wildcard_event(self, event, children): # event.add_tag(f"{rdtype.lower()}-wildcard-domain") finally: - log.debug(f"Finished handle_wildcard_event({event}, children={children})") + log.debug(f"Finished handle_wildcard_event({event}, children={event.dns_children})") async def _mock_dns(self, mock_data): from .mock import MockResolver diff --git a/bbot/modules/base.py b/bbot/modules/base.py index 1536107aed..a502037597 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -111,6 +111,7 @@ class BaseModule: _priority = 3 _name = "base" _type = "scan" + _hook = False def __init__(self, scan): """Initializes a module instance. @@ -590,7 +591,7 @@ async def _worker(self): - Each event is subject to a post-check via '_event_postcheck()' to decide whether it should be handled. - Special 'FINISHED' events trigger the 'finish()' method of the module. """ - async with self.scan._acatch(context=self._worker): + async with self.scan._acatch(context=self._worker, unhandled_is_critical=True): try: while not self.scan.stopping and not self.errored: # hold the reigns if our outgoing queue is full @@ -691,7 +692,10 @@ async def _event_postcheck(self, event): """ A simple wrapper for dup tracking """ - acceptable, reason = await self.__event_postcheck(event) + # special exception for "FINISHED" event + if event.type in ("FINISHED",): + return True, "" + acceptable, reason = await self._event_postcheck_inner(event) if acceptable: # check duplicates is_incoming_duplicate, reason = self.is_incoming_duplicate(event, add=True) @@ -700,7 +704,7 @@ async def _event_postcheck(self, event): return acceptable, reason - async def __event_postcheck(self, event): + async def _event_postcheck_inner(self, event): """ Post-checks an event to determine if it should be accepted by the module for handling. @@ -718,10 +722,6 @@ async def __event_postcheck(self, event): - This method also maintains host-based tracking when the `per_host_only` or similar flags are set. - The method will also update event production stats for output modules. """ - # special exception for "FINISHED" event - if event.type in ("FINISHED",): - return True, "" - # force-output certain events to the graph if self._is_graph_important(event): return True, "event is critical to the graph" @@ -1399,3 +1399,82 @@ def critical(self, *args, trace=True, **kwargs): self.log.critical(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() + + +class HookModule(BaseModule): + accept_dupes = True + suppress_dupes = False + _hook = True + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._first = False + + async def _worker(self): + async with self.scan._acatch(context=self._worker, unhandled_is_critical=True): + try: + while not self.scan.stopping and not self.errored: + + try: + if self.incoming_event_queue is not False: + incoming = await self.get_incoming_event() + try: + event, _kwargs = incoming + except ValueError: + event = incoming + _kwargs = {} + else: + self.debug(f"Event queue is in bad state") + break + except asyncio.queues.QueueEmpty: + await asyncio.sleep(0.1) + continue + + if event.type == "FINISHED": + context = f"{self.name}.finish()" + async with self.scan._acatch(context), self._task_counter.count(context): + finish_task = asyncio.create_task(self.finish()) + await finish_task + continue + + self.debug(f"Got {event} from {getattr(event, 'module', 'unknown_module')}") + async with self._task_counter.count(f"event_postcheck({event})"): + acceptable, reason = await self._event_postcheck(event) + + if acceptable: + context = f"{self.name}.handle_event({event})" + self.scan.stats.event_consumed(event, self) + self.debug(f"Hooking {event}") + async with self.scan._acatch(context), self._task_counter.count(context): + task_name = f"{self.name}.handle_event({event})" + handle_event_task = asyncio.create_task(self.handle_event(event), name=task_name) + await handle_event_task + self.debug(f"Finished hooking {event}") + else: + self.debug(f"Not hooking {event} because {reason}") + + await self.outgoing_event_queue.put((event, _kwargs)) + + except asyncio.CancelledError: + self.log.trace("Worker cancelled") + raise + self.log.trace(f"Worker stopped") + + async def get_incoming_event(self): + try: + return self.incoming_event_queue.get_nowait() + except asyncio.queues.QueueEmpty: + if self._first: + return self.scan.manager.get_event_from_modules() + raise + + async def queue_event(self, event, precheck=False): + try: + self.incoming_event_queue.put_nowait(event) + if event.type != "FINISHED": + self.scan.manager._new_activity = True + except AttributeError: + self.debug(f"Not in an acceptable state to queue incoming event") + + async def _event_postcheck(self, event): + return await self._event_postcheck_inner(event) diff --git a/bbot/modules/httpx.py b/bbot/modules/httpx.py index 30cc3fba3c..0f74fbcfc6 100644 --- a/bbot/modules/httpx.py +++ b/bbot/modules/httpx.py @@ -173,8 +173,6 @@ async def handle_batch(self, *events): if url_event: if url_event != source_event: await self.emit_event(url_event) - else: - url_event._resolved.set() # HTTP response await self.emit_event(j, "HTTP_RESPONSE", url_event, tags=url_event.tags) diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py new file mode 100644 index 0000000000..62ba4362a0 --- /dev/null +++ b/bbot/modules/internal/dnsresolve.py @@ -0,0 +1,75 @@ +from bbot.modules.base import HookModule + + +class dnsresolve(HookModule): + hooked_events = ["DNS_NAME"] + _priority = 1 + + async def setup(self): + self.dns_resolution = self.scan.config.get("dns_resolution", False) + return True + + async def handle_event(self, event): + event.add_tag("dnsresolved") + resolved_hosts = set() + dns_children = {} + dns_tags = set() + + # skip DNS resolution if it's disabled in the config and the event is a target and we don't have a blacklist + # this is a performance optimization and it'd be nice if we could do it for all events not just targets + # but for non-target events, we need to know what IPs they resolve to so we can make scope decisions about them + skip_dns_resolution = (not self.dns_resolution) and "target" in event.tags and not self.scan.blacklist + if skip_dns_resolution: + dns_tags = {"resolved"} + else: + # DNS resolution + dns_tags, dns_children = await self.helpers.dns.resolve_event(event, minimal=not self.dns_resolution) + + # whitelisting / blacklisting based on resolved hosts + event_whitelisted = False + event_blacklisted = False + for rdtype, children in dns_children.items(): + if event_blacklisted: + break + for host in children: + if rdtype in ("A", "AAAA", "CNAME"): + for ip in ips: + resolved_hosts.add(ip) + # having a CNAME to an in-scope resource doesn't make you in-scope + if not event_whitelisted and rdtype != "CNAME": + with suppress(ValidationError): + if self.parent_helper.scan.whitelisted(host): + event_whitelisted = True + # CNAME to a blacklisted resources, means you're blacklisted + with suppress(ValidationError): + if self.parent_helper.scan.blacklisted(host): + event_blacklisted = True + break + + # kill runaway DNS chains + dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) + if dns_resolve_distance >= self.helpers.dns.max_dns_resolve_distance: + log.debug( + f"Skipping DNS children for {event} because their DNS resolve distances would be greater than the configured value for this scan ({self.scan.helpers.dns.max_dns_resolve_distance})" + ) + dns_children = {} + + if event.type in ("DNS_NAME", "IP_ADDRESS"): + event._dns_children = dns_children + for tag in dns_tags: + event.add_tag(tag) + + event._resolved_hosts = resolved_hosts + + event_whitelisted = event_whitelisted_dns | self.scan.whitelisted(event) + event_blacklisted = event_blacklisted_dns | self.scan.blacklisted(event) + if event_blacklisted: + event.add_tag("blacklisted") + reason = "event host" + if event_blacklisted_dns: + reason = "DNS associations" + log.debug(f"Omitting due to blacklisted {reason}: {event}") + return + + if event_whitelisted: + event.add_tag("whitelisted") diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index 6193c5e8ce..4d6a0e2397 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -49,9 +49,39 @@ def __init__(self, scan): self._task_counter = TaskCounter() self._new_activity = True self._modules_by_priority = None + self._hook_modules = None + self._non_hook_modules = None self._incoming_queues = None self._module_priority_weights = None + async def _worker_loop(self): + try: + while not self.scan.stopped: + try: + async with self._task_counter.count("get_event_from_modules()"): + # if we have hooks set up, we always get events from the last (lowest priority) hook module. + if self.hook_modules: + last_hook_module = self.hook_modules[-1] + incoming = last_hook_module.outgoing_event_queue.get_nowait() + else: + # otherwise, we go through all the modules + incoming = self.get_event_from_modules() + try: + event, kwargs = incoming + except: + log.critical(incoming) + except asyncio.queues.QueueEmpty: + await asyncio.sleep(0.1) + continue + async with self._task_counter.count(f"emit_event({event})"): + emit_event_task = asyncio.create_task( + self.emit_event(event, **kwargs), name=f"emit_event({event})" + ) + await emit_event_task + + except Exception: + log.critical(traceback.format_exc()) + async def init_events(self): """ Initializes events by seeding the scanner with target events and distributing them for further processing. @@ -63,9 +93,9 @@ async def init_events(self): context = f"manager.init_events()" async with self.scan._acatch(context), self._task_counter.count(context): - await self.distribute_event(self.scan.root_event) + sorted_events = sorted(self.scan.target.events, key=lambda e: len(e.data)) - for event in sorted_events: + for event in [self.scan.root_event] + sorted_events: event._dummy = False event.scope_distance = 0 event.web_spider_distance = 0 @@ -75,7 +105,11 @@ async def init_events(self): if event.module is None: event.module = self.scan._make_dummy_module(name="TARGET", _type="TARGET") self.scan.verbose(f"Target: {event}") - self.queue_event(event) + if self.hook_modules: + first_hook_module = self.hook_modules[0] + await first_hook_module.queue_event(event) + else: + self.queue_event(event) await asyncio.sleep(0.1) self.scan._finished_init = True @@ -97,21 +131,17 @@ async def emit_event(self, event, *args, **kwargs): # skip event if it fails precheck if event.type != "DNS_NAME": acceptable = self._event_precheck(event) - if not acceptable: - event._resolved.set() - return log.debug(f'Module "{event.module}" raised {event}') if quick: log.debug(f"Quick-emitting {event}") - event._resolved.set() for kwarg in callbacks: kwargs.pop(kwarg, None) async with self.scan._acatch(context=self.distribute_event): await self.distribute_event(event) else: - async with self.scan._acatch(context=self._emit_event, finally_callback=event._resolved.set): + async with self.scan._acatch(context=self._emit_event): await self._emit_event( event, *args, @@ -174,53 +204,7 @@ async def _emit_event(self, event, **kwargs): on_success_callback = kwargs.pop("on_success_callback", None) abort_if = kwargs.pop("abort_if", None) - # skip DNS resolution if it's disabled in the config and the event is a target and we don't have a blacklist - skip_dns_resolution = (not self.dns_resolution) and "target" in event.tags and not self.scan.blacklist - if skip_dns_resolution: - event._resolved.set() - dns_children = {} - dns_tags = {"resolved"} - event_whitelisted_dns = True - event_blacklisted_dns = False - resolved_hosts = [] - else: - # DNS resolution - ( - dns_tags, - event_whitelisted_dns, - event_blacklisted_dns, - dns_children, - ) = await self.scan.helpers.dns.resolve_event(event, minimal=not self.dns_resolution) - resolved_hosts = set() - for rdtype, ips in dns_children.items(): - if rdtype in ("A", "AAAA", "CNAME"): - for ip in ips: - resolved_hosts.add(ip) - - # kill runaway DNS chains - dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) - if dns_resolve_distance >= self.scan.helpers.dns.max_dns_resolve_distance: - log.debug( - f"Skipping DNS children for {event} because their DNS resolve distances would be greater than the configured value for this scan ({self.scan.helpers.dns.max_dns_resolve_distance})" - ) - dns_children = {} - - if event.type in ("DNS_NAME", "IP_ADDRESS"): - event._dns_children = dns_children - for tag in dns_tags: - event.add_tag(tag) - - event._resolved_hosts = resolved_hosts - - event_whitelisted = event_whitelisted_dns | self.scan.whitelisted(event) - event_blacklisted = event_blacklisted_dns | self.scan.blacklisted(event) - if event_blacklisted: - event.add_tag("blacklisted") - reason = "event host" - if event_blacklisted_dns: - reason = "DNS associations" - log.debug(f"Omitting due to blacklisted {reason}: {event}") - return + event_whitelisted = "whitelisted" in event.tags # other blacklist rejections - URL extensions, etc. if "blacklisted" in event.tags: @@ -244,7 +228,7 @@ async def _emit_event(self, event, **kwargs): if event.scope_distance <= self.scan.scope_search_distance: if not "unresolved" in event.tags: if not self.scan.helpers.is_ip_type(event.host): - await self.scan.helpers.dns.handle_wildcard_event(event, dns_children) + await self.scan.helpers.dns.handle_wildcard_event(event) # For DNS_NAMEs, we've waited to do this until now, in case event.data changed during handle_wildcard_event() if event.type == "DNS_NAME": @@ -301,8 +285,8 @@ async def _emit_event(self, event, **kwargs): if emit_children: dns_child_events = [] - if dns_children: - for rdtype, records in dns_children.items(): + if event.dns_children: + for rdtype, records in event.dns_children.items(): module = self.scan._make_dummy_module_dns(rdtype) module._priority = 4 for record in records: @@ -329,7 +313,6 @@ async def _emit_event(self, event, **kwargs): log.trace(traceback.format_exc()) finally: - event._resolved.set() log.debug(f"{event.module}.emit_event() finished for {event}") def is_incoming_duplicate(self, event, add=False): @@ -395,30 +378,15 @@ async def distribute_event(self, event): if not is_outgoing_duplicate and -1 < event.scope_distance < 1: self.scan.word_cloud.absorb_event(event) for mod in self.scan.modules.values(): + # don't distribute events to hook modules + if mod._hook: + continue acceptable_dup = (not is_outgoing_duplicate) or mod.accept_dupes # graph_important = mod._type == "output" and event._graph_important == True graph_important = mod._is_graph_important(event) if acceptable_dup or graph_important: await mod.queue_event(event) - async def _worker_loop(self): - try: - while not self.scan.stopped: - try: - async with self._task_counter.count("get_event_from_modules()"): - event, kwargs = self.get_event_from_modules() - except asyncio.queues.QueueEmpty: - await asyncio.sleep(0.1) - continue - async with self._task_counter.count(f"emit_event({event})"): - emit_event_task = asyncio.create_task( - self.emit_event(event, **kwargs), name=f"emit_event({event})" - ) - await emit_event_task - - except Exception: - log.critical(traceback.format_exc()) - def kill_module(self, module_name, message=None): from signal import SIGINT @@ -438,7 +406,7 @@ def modules_by_priority(self): @property def incoming_queues(self): if not self._incoming_queues: - queues_by_priority = [m.outgoing_event_queue for m in self.modules_by_priority] + queues_by_priority = [m.outgoing_event_queue for m in self.modules_by_priority if not m._hook] self._incoming_queues = [self.incoming_event_queue] + queues_by_priority return self._incoming_queues @@ -453,10 +421,24 @@ def incoming_qsize(self): def module_priority_weights(self): if not self._module_priority_weights: # we subtract from six because lower priorities == higher weights - priorities = [5] + [6 - m.priority for m in self.modules_by_priority] + priorities = [5] + [6 - m.priority for m in self.modules_by_priority if not m._hook] self._module_priority_weights = priorities return self._module_priority_weights + @property + def hook_modules(self): + if self._hook_modules is None: + self._hook_modules = [m for m in self.modules_by_priority if m._hook] + if self._hook_modules: + self._hook_modules[0]._first = True + return self._hook_modules + + @property + def non_hook_modules(self): + if self._non_hook_modules is None: + self._non_hook_modules = [m for m in self.modules_by_priority if not m._hook] + return self._non_hook_modules + def get_event_from_modules(self): for q in self.scan.helpers.weighted_shuffle(self.incoming_queues, self.module_priority_weights): try: @@ -485,8 +467,6 @@ def queue_event(self, event, **kwargs): event_in_scope = self.scan.whitelisted(event) and not self.scan.blacklisted(event) if not event_in_scope: event.module_priority += event.scope_distance - # Wait for parent event to resolve (in case its scope distance changes) - # await resolved = event.source._resolved.wait() # update event's scope distance based on its parent event.scope_distance = event.source.scope_distance + 1 self.incoming_event_queue.put_nowait((event, kwargs)) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index c7aaf8f447..e2b813cbc5 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -316,13 +316,13 @@ async def async_start(self): asyncio.create_task(self.manager._worker_loop()) for _ in range(self.max_workers) ] - # distribute seed events - self.init_events_task = asyncio.create_task(self.manager.init_events()) - self.status = "RUNNING" self._start_modules() self.verbose(f"{len(self.modules):,} modules started") + # distribute seed events + self.init_events_task = asyncio.create_task(self.manager.init_events()) + # main scan loop while 1: # abort if we're aborting @@ -330,12 +330,13 @@ async def async_start(self): self._drain_queues() break + # yield events as they come (async for event in scan.async_start()) if "python" in self.modules: events, finish = await self.modules["python"]._events_waiting(batch_size=-1) for e in events: yield e - # if initialization finished and the scan is no longer active + # break if initialization finished and the scan is no longer active if self._finished_init and not self.manager.active: new_activity = await self.finish() if not new_activity: @@ -387,7 +388,17 @@ async def async_start(self): def _start_modules(self): self.verbose(f"Starting module worker loops") - for module_name, module in self.modules.items(): + + # hook modules get sewn together like human centipede + if len(self.manager.hook_modules) > 1: + for i, hook_module in enumerate(self.manager.hook_modules[:-1]): + next_hook_module = self.manager.hook_modules[i + 1] + self.debug( + f"Setting hook module {hook_module.name}.outgoing_event_queue to next hook module {next_hook_module.name}.incoming_event_queue" + ) + hook_module._outgoing_event_queue = next_hook_module.incoming_event_queue + + for module in self.modules.values(): module.start() async def setup_modules(self, remove_failed=True): @@ -552,8 +563,8 @@ async def finish(self): self.status = "FINISHING" # Trigger .finished() on every module and start over log.info("Finishing scan") - finished_event = self.make_event("FINISHED", "FINISHED", dummy=True) for module in self.modules.values(): + finished_event = self.make_event(f"FINISHED", "FINISHED", dummy=True, tags={module.name}) await module.queue_event(finished_event) self.verbose("Completed finish()") return True @@ -767,7 +778,6 @@ def root_event(self): root_event = self.make_event(data=f"{self.name} ({self.id})", event_type="SCAN", dummy=True) root_event._id = self.id root_event.scope_distance = 0 - root_event._resolved.set() root_event.source = root_event root_event.module = self._make_dummy_module(name="TARGET", _type="TARGET") return root_event @@ -993,7 +1003,7 @@ async def _status_ticker(self, interval=15): self.manager.modules_status(_log=True) @contextlib.asynccontextmanager - async def _acatch(self, context="scan", finally_callback=None): + async def _acatch(self, context="scan", finally_callback=None, unhandled_is_critical=False): """ Async version of catch() @@ -1003,9 +1013,9 @@ async def _acatch(self, context="scan", finally_callback=None): try: yield except BaseException as e: - self._handle_exception(e, context=context) + self._handle_exception(e, context=context, unhandled_is_critical=unhandled_is_critical) - def _handle_exception(self, e, context="scan", finally_callback=None): + def _handle_exception(self, e, context="scan", finally_callback=None, unhandled_is_critical=False): if callable(context): context = f"{context.__qualname__}()" filename, lineno, funcname = self.helpers.get_traceback_details(e) @@ -1018,8 +1028,12 @@ def _handle_exception(self, e, context="scan", finally_callback=None): elif isinstance(e, asyncio.CancelledError): raise elif isinstance(e, Exception): - log.error(f"Error in {context}: {filename}:{lineno}:{funcname}(): {e}") - log.trace(traceback.format_exc()) + if unhandled_is_critical: + log.critical(f"Error in {context}: {filename}:{lineno}:{funcname}(): {e}") + log.critical(traceback.format_exc()) + else: + log.error(f"Error in {context}: {filename}:{lineno}:{funcname}(): {e}") + log.trace(traceback.format_exc()) if callable(finally_callback): finally_callback(e) diff --git a/poetry.lock b/poetry.lock index 94601cf202..7be6743c12 100644 --- a/poetry.lock +++ b/poetry.lock @@ -695,6 +695,32 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + [[package]] name = "httpcore" version = "1.0.5" @@ -740,6 +766,17 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + [[package]] name = "identify" version = "2.5.35" @@ -827,6 +864,17 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "kiss-headers" +version = "2.4.3" +description = "Object-oriented HTTP and IMAP (structured) headers." +optional = false +python-versions = ">=3.7" +files = [ + {file = "kiss_headers-2.4.3-py3-none-any.whl", hash = "sha256:9d800b77532068e8748be9f96f30eaeb547cdc5345e4689ddf07b77071256239"}, + {file = "kiss_headers-2.4.3.tar.gz", hash = "sha256:70c689ce167ac83146f094ea916b40a3767d67c2e05a4cb95b0fd2e33bf243f1"}, +] + [[package]] name = "libsass" version = "0.23.0" @@ -1304,6 +1352,30 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "niquests" +version = "3.5.5" +description = "Niquests is a simple, yet elegant, HTTP library. It is a drop-in replacement for Requests, which is under feature freeze." +optional = false +python-versions = ">=3.7" +files = [ + {file = "niquests-3.5.5-py3-none-any.whl", hash = "sha256:bd134c7cbc414661840e73bebe0b766c16321558b3c444efb3f63aad9189e308"}, + {file = "niquests-3.5.5.tar.gz", hash = "sha256:5b52183cd4ee16f360de1e5b97bc266b933e8603320102d10d17f68a95e926ba"}, +] + +[package.dependencies] +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +kiss-headers = ">=2,<4" +urllib3-future = ">=2.7.900,<3" +wassima = ">=1.0.1,<2" + +[package.extras] +http3 = ["urllib3-future[qh3]"] +ocsp = ["cryptography (>=41.0.0,<43.0.0)"] +socks = ["urllib3-future[socks]"] +speedups = ["orjson (>=3,<4)", "urllib3-future[brotli,zstd]"] + [[package]] name = "nodeenv" version = "1.8.0" @@ -2083,6 +2155,47 @@ files = [ [package.dependencies] cffi = {version = "*", markers = "implementation_name == \"pypy\""} +[[package]] +name = "qh3" +version = "0.15.1" +description = "An implementation of QUIC and HTTP/3" +optional = false +python-versions = ">=3.7" +files = [ + {file = "qh3-0.15.1-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:fe8f15e9fe5850508188ce38bdc89bda03d1a99ce3c2fbde6ee02d1d91edc557"}, + {file = "qh3-0.15.1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:114d04dd51d3d9eca76ce804fea60ccb0fcbe84be08dcca70f32e30e5736aa00"}, + {file = "qh3-0.15.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:265240539a630cf458f3651f08bd07e4d46b2bf941a25e7f594321401701b30d"}, + {file = "qh3-0.15.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1074ee0e30fe825b60bd113767b56dcfe2f155e79f893d5180d4fd2adebaa1de"}, + {file = "qh3-0.15.1-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0afd9e7b90c90ff3e8c8e376020e3753936da0ce8db57ebb9fc95a50ba7e015d"}, + {file = "qh3-0.15.1-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:b7c4df89b03f90f67e372693c70f357dabc18908cb07dab21aa550c4f777017b"}, + {file = "qh3-0.15.1-cp37-abi3-musllinux_1_1_i686.whl", hash = "sha256:3923bb17dbdf91f060cb3b04cb8c2e3bf74d528a26f4c0e5365e311bade33b58"}, + {file = "qh3-0.15.1-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:87b61b59e3c692b70384430ccf634a228c54bb38ee6d974d76a7b086b356ecad"}, + {file = "qh3-0.15.1-cp37-abi3-win32.whl", hash = "sha256:3d02314850b0c8a5cd39015b9f5e5b21d54980702e3e80dcfc6aa7b983d7494a"}, + {file = "qh3-0.15.1-cp37-abi3-win_amd64.whl", hash = "sha256:1a0305b389cec13af879dee32c6584cff45a52865456e6645d84023ed8442d67"}, + {file = "qh3-0.15.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:8c000a32d2d3dadf252a55d71f676011f02c0e529024176d35e53122293d8a54"}, + {file = "qh3-0.15.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9562c2648a0e468cc3c97e77c658c0b9db288e29cfc79d52220e50ddcfac9fe9"}, + {file = "qh3-0.15.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71ab5d62606556c0ba2b1f3bf118dcb2d6f0236add792ffba42845a741abe498"}, + {file = "qh3-0.15.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:592bd246325090ffe8324761808713b1c99c7b7cae37ec4bd2841d0054729422"}, + {file = "qh3-0.15.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f68ac19161aaef887351f2e8df1972d91726ade69105b4ae1653ab0e70a18536"}, + {file = "qh3-0.15.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a842e65e57f8092f1fa185b1dc95556b1b695f06a4eb48dc9c07f018bd7a7ec"}, + {file = "qh3-0.15.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bca27698ad110fabda026f844f453b1ac1a1e2d86729846f5be0cdc9e7df419"}, + {file = "qh3-0.15.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c87a0613efbc3d353a76a917044270caf43198890ffe702b3cbe9b44065c45e"}, + {file = "qh3-0.15.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:77673a9b02e19c4f81e419efa2aa4040dec10f0a6158788196d8b5ef6aafb0d9"}, + {file = "qh3-0.15.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:087da39ebd5a8608e8df0892860b4fdcd4ff83753d7312cead490de6f1bce504"}, + {file = "qh3-0.15.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:561ba4d84e617ecc0d7506f532da2814e672a06cdcb903209616f00c5da74c14"}, + {file = "qh3-0.15.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec733c6a4da5ecf4448434562aba617ecfabbdef0a58df812684db7d03000070"}, + {file = "qh3-0.15.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74cc03a94e605820f3c5882e47388e8d2d8616d51db57a6e5120d9f2344dc04a"}, + {file = "qh3-0.15.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:939253ceaf5664c4e90f6317f0097839b6c8af627bb5905181f4fcbbc209c395"}, + {file = "qh3-0.15.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:969582d286be3e468ff5e53cdf2a5f47a942ea370f870a0276c4235a7ed13a71"}, + {file = "qh3-0.15.1.tar.gz", hash = "sha256:816c787f68855a28aa703be54956b21ff258e1650978a06b98a23bbf252cbe7e"}, +] + +[package.dependencies] +cryptography = ">=41.0.0,<43" + +[package.extras] +dev = ["coverage[toml] (>=7.2.2)"] + [[package]] name = "regex" version = "2023.12.25" @@ -2401,6 +2514,28 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "urllib3-future" +version = "2.7.903" +description = "urllib3.future is a powerful HTTP 1.1, 2, and 3 client with both sync and async interfaces" +optional = false +python-versions = ">=3.7" +files = [ + {file = "urllib3_future-2.7.903-py3-none-any.whl", hash = "sha256:04bebce1291c9be9db2b03bb016db56d1f7e3dbe425c7250129552a8ceaf6827"}, + {file = "urllib3_future-2.7.903.tar.gz", hash = "sha256:99e1265c8bb2478d86b8a6c0de991ac275ad58d5e43ac11d980a0dd1cc183804"}, +] + +[package.dependencies] +h11 = ">=0.11.0,<1.0.0" +h2 = ">=4.0.0,<5.0.0" +qh3 = {version = ">=0.14.0,<1.0.0", markers = "(platform_system == \"Darwin\" or platform_system == \"Windows\" or platform_system == \"Linux\") and (platform_python_implementation == \"CPython\" or (platform_python_implementation == \"PyPy\" and python_version >= \"3.8\" and python_version < \"3.11\"))"} + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +qh3 = ["qh3 (>=0.14.0,<1.0.0)"] +socks = ["python-socks (>=2.0,<3.0)"] +zstd = ["zstandard (>=0.18.0)"] + [[package]] name = "virtualenv" version = "20.25.1" @@ -2421,6 +2556,82 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +[[package]] +name = "wassima" +version = "1.1.0" +description = "Access your OS root certificates with the atmost ease" +optional = false +python-versions = ">=3.7" +files = [ + {file = "wassima-1.1.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6b67781f7b9483a5dbcb1cabe588ab316f06f7c97a9d60b6981681f790aa16a1"}, + {file = "wassima-1.1.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:fb331ab3ff4222ced413a9830c1e9e6a834e7257bfee0043d2f56166ef4aa1cb"}, + {file = "wassima-1.1.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8904e865a2ac81d8160878e7788bc5ee6f4ca6948cf5c5198a83050d68537024"}, + {file = "wassima-1.1.0-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c414ee94cd1986d7ea3700a6d80efc9ae9b194c37d77396bcfaf927b0d5a620e"}, + {file = "wassima-1.1.0-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7d5d2d1f4f35808a58c8fe7777db14526bd53f77a34b373f070912b2c23f2c3b"}, + {file = "wassima-1.1.0-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ee1b84222c65f0e2b8ecb6362cc721df1953a0a59e13efc7a4055592fd897f8"}, + {file = "wassima-1.1.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56450dee854ce494003f2be92f2eddb2531c02a456a7866dd32af467672c3b7b"}, + {file = "wassima-1.1.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28897780714f49331fd3e76531ea248df637bbf3e7bf4be175381a92d596c460"}, + {file = "wassima-1.1.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:7528cbfe710af7f9e92cd52296efd7a788466b7cc7fe575b196f604a6ba2281c"}, + {file = "wassima-1.1.0-cp37-abi3-musllinux_1_1_armv7l.whl", hash = "sha256:4c3325dff14e796d346e81f90067d054714b99a3d86b6d0a5a76d85bafd2b654"}, + {file = "wassima-1.1.0-cp37-abi3-musllinux_1_1_i686.whl", hash = "sha256:e6609ca3d620792c1dc137efff4c189adee0f13f266ae14515d7de2952159b95"}, + {file = "wassima-1.1.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:60a695e750f9b4fc3cc79cbbbb5e507b8f9715e4135906bb1822126fad1ce5a2"}, + {file = "wassima-1.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:fca891820f7c679d3adc2443d6f85d6201db4badc6b17927d70fa498168d1aea"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:66efd9ee997bfb2311ade7a09f3174d6450a8695ab6b323840539c5826a276c6"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:a8550eb00a31eac76a5b5fab3ca2e87cc8d91781191dffa3e133ebf574305321"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4df677518d7779fc8a522132c4d96391e0a262dd12bb54ec3937bc8b58f6d3d5"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:29d4f6d006ce96c2087c42414ad72ef71bc25bd20ac457dfe89ab2448b0d08e4"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e73264427c6e3f93c7e1b0529960a342a6b4c9c16d17785872a845ee2b0d28f5"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fdbc87751649a23377f279873aae902a38ce44162170edd6b6893d47a259a78"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce49ac61ca651f49c2664003215e259a017d5a1116d669ef331c4930214f53b0"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9522b4905fc75eeaac8518c54362e87d89e83bbefebdb1898a0ef025006e8241"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1b2e419d3075e425ecdcefd486ccd56697dc209e6e2120746477a995392b9402"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1ebe5b0feead8b0457b885f181156574bf9ca88df6fe4cef6ad6b364f02d9e98"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-musllinux_1_1_i686.whl", hash = "sha256:6947c5e2d23383f00199b2cf638d7a090dfe5949bad113387e020b83f2423815"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:4420be43f5b4e2b7721080130de565a582299d0d02771c9a7db55366d9c93da5"}, + {file = "wassima-1.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a9fb48413d80d41aa6531a2271516f63c8a1debac016cf8fad6a2fd30aa4486d"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9a4593db51fc02529950158f1217e08c9d62e1299e20a19858f07f80c6d09197"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-macosx_11_0_arm64.whl", hash = "sha256:127aecd895501e79b76114109dba0e4bcf6adcf47169f75d44ecd08b4d983ae7"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7d36eb4e92a348f58182f7f69b0e2fc680ac6605377f5201bac40b303727493"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0e29ace26e79b923d5b0f04c38dff44dc47b9c48684894d8f20841c6ee79760"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ab77a0390ba74b7a011918ae5c187e2936cd46f4abffd37c5ff228dbdc4b5e89"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8b18821d94eabef23946e21566e7ae7c009ef3a89fe1bc0204e791ba5fdb8ed5"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ef95ad25c04629939d6a6015a798c8b0435cebc0c53cc4b1dabb2a89214a4d8"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ebef47ce05288db306d4f56937f96c48da07afaec014a6ed46ecb17176f874bf"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f0833555f8a334cf1c824f24b07c6b01b13128825d16f7802c4c70d14d2dbe09"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:31afbbe4ea11ea9f92b152e4a61495614bfc0ae3d7c3215a24928144bab79f99"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-musllinux_1_1_i686.whl", hash = "sha256:a759b84855b70922ee31234264ea2f4a058943a38270a18f00fd597f365b4bcb"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa01044ab3ca1f55e2d0d08128a97a68e9022795587627ee9edb3471c72e5df4"}, + {file = "wassima-1.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:08d6cf46441d73335b84c15c4f891adcb59f70701a13ecdee82aead5e0a9b134"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:23b56e0560bd2f35fceab001099bb890d8238fed64e7a0677cacbd1c4d870183"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac1866ee965263e3e024049044e8a5ce905fea2d40e005be03dcd89265fc1e6c"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4285c88f5cb4322318e3c3666d79b923f5317451efc2701011d960774d812675"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1f643f02b0824e789a7c98b9089dfd772a74ceec1a611cf420799f986cadb6bc"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9a7b91256ca429f99beff72dd89b0d5bd6ee1ca8f047138785c5b943eebfb1"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4224cf40a81840618a22164d4002fe5bb9b83cde957ec16e8913996809e705dd"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1006c7510b8495559fc2f1f27a7e49205140eb6b91a91f2c71cd91c2588522ae"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5abfa548d3c7acbf87899fc4af99c5a1fe929cf8cc7a7fd65a825dd88fa37b10"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c5854745eb0fd9243ebbaad46dc1f6f5193dd3f13f12dd19da95877ee2a8d62c"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:9def0580135d80a64aac4761e008d0d82fad5feb9c5028ba9427393144e4a535"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-musllinux_1_1_i686.whl", hash = "sha256:450501472645fe5ea65f1848466ce5a0f2800ed5e13288fa4c210728e2883d24"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:9764c226493e4a9b960156c3657ef7cece430ab8bad0035ebffb0eeb488633cb"}, + {file = "wassima-1.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:83792a234431f7fbd06f3370e968b99df430ab3bacdb9ea3318247d55dee3b6c"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:aee6fcfa43ce63691ec30943681e9432ff6cecbd976526c7ec0e5f2aaf85866a"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:31e69da1f3cf1ce4f24dbc4590101d68fcb3e1f715566fe30b6691429e9c1b10"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b574a646498b191bc8974524458d85bc55335992dc8ea7cddcb09ec58c01d4"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3112316434fd3ed3cfb1eac4998f54ed46d07a36172d18d543c0815a98e0d51"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:18e6f92114f878ea26fea7a10af255a6aadfddb1600f20fdfe96d65598e62501"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:09d54c87ce23ec2332f2acefc030ae3f4262b94cb1f0c613c8d2e30c297d12d7"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9d953b261b7b64072fbed7b4bf4441f7910d8247387f29cc82f8c314f7acf39"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3c8ef5a3d129997147f5475c276bc79da14ac59a8f614f07634e2aac5d9b2f94"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e9a6da09d6a03c0c8ec3f5c6b7fa5061f051d67a0e0f0ec1518d2bd76efb7535"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:39d65b2beb0eb17a92cdf859d8e9146a15f8d7f35ab95602780a3ac078069e7e"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-musllinux_1_1_i686.whl", hash = "sha256:e5ed0411e3a14e9352ff83e47952df03b7c8915f9fd4c9fb0888a80ac2759dcf"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:8f5869858975755d90e5505d3a7e2ac687cd09a348bc48137fd5b270969bd7a0"}, + {file = "wassima-1.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:39742c4884b7d1b3314064895e10345b96c7cab0a8f622e65f7beea89c0de4d2"}, + {file = "wassima-1.1.0-py3-none-any.whl", hash = "sha256:d250b77c1964c03f010a271fdd0cad3e14af250fb15cc3a729f23ee1e5922f69"}, + {file = "wassima-1.1.0.tar.gz", hash = "sha256:0ae03025ec07c0491e2d1a499d404eb66180c226f403451042190294f6ec7f06"}, +] + [[package]] name = "watchdog" version = "4.0.0" @@ -2613,4 +2824,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "1532c2dc5846395a46766fead9f3c29223369ba11025b04e4eebec508fe0d8da" +content-hash = "38517d808d6bc20a9e2c8597b4024707537f2a92d1f75c67a5ed3477c139418b" diff --git a/pyproject.toml b/pyproject.toml index 53dfe2131c..cb8766c381 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ cachetools = "^5.3.2" socksio = "^1.0.0" jinja2 = "^3.1.3" pyzmq = "^25.1.2" +niquests = "^3.5.5" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From 84074c1e9cb99ed4a7dea69dcce6617f0126553d Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 8 Apr 2024 17:45:49 -0400 Subject: [PATCH 19/63] WIP hook modules --- bbot/core/event/base.py | 6 +- bbot/core/helpers/dns/dns.py | 9 +-- bbot/core/helpers/dns/engine.py | 15 ---- bbot/modules/base.py | 35 +++++++--- bbot/modules/internal/dnsresolve.py | 103 ++++++++++++++++++++++------ bbot/scanner/manager.py | 95 ++++--------------------- bbot/scanner/scanner.py | 3 - 7 files changed, 128 insertions(+), 138 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index bf030ebc28..345e64115b 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1,6 +1,5 @@ import re import json -import asyncio import logging import ipaddress import traceback @@ -604,7 +603,7 @@ def json(self, mode="json", siem_friendly=False): j["scan"] = self.scan.id j["timestamp"] = self.timestamp.timestamp() if self.host: - j["resolved_hosts"] = [str(h) for h in self.resolved_hosts] + j["resolved_hosts"] = sorted(str(h) for h in self.resolved_hosts) source_id = self.source_id if source_id: j["source"] = source_id @@ -951,7 +950,8 @@ def sanitize_data(self, data): @property def resolved_hosts(self): - return [".".join(i.split("-")[1:]) for i in self.tags if i.startswith("ip-")] + # TODO: remove this when we rip out httpx + return set(".".join(i.split("-")[1:]) for i in self.tags if i.startswith("ip-")) @property def pretty_string(self): diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index f9c505540b..3988420482 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -3,9 +3,7 @@ import dns.exception import dns.asyncresolver from cachetools import LRUCache -from contextlib import suppress -from bbot.errors import ValidationError from bbot.core.engine import EngineClient from bbot.core.helpers.async_helpers import NamedLock from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name, host_in_host @@ -96,7 +94,7 @@ async def resolve_event(self, event, minimal=False): # abort if the event doesn't have a host if (not event.host) or (event.type in ("IP_RANGE",)): # tags, whitelisted, blacklisted, children - return set(), False, False, dict() + return set(), dict() event_host = str(event.host) event_type = str(event.type) @@ -104,7 +102,7 @@ async def resolve_event(self, event, minimal=False): dns_children = dict() if (not event.host) or (event.type in ("IP_RANGE",)): - return event_tags, event_whitelisted, event_blacklisted, dns_children + return event_tags, dns_children # lock to ensure resolution of the same host doesn't start while we're working here async with self._event_cache_locks.lock(event_host): @@ -112,9 +110,6 @@ async def resolve_event(self, event, minimal=False): try: _event_tags, _dns_children = self._event_cache[event_host] event_tags.update(_event_tags) - # if we found it, return it - if _event_whitelisted is not None: - return event_tags, _dns_children except KeyError: pass diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 2b5903292b..abba661a81 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -23,7 +23,6 @@ host_in_host, make_ip_type, smart_decode, - cloudcheck, rand_string, ) @@ -470,20 +469,6 @@ async def resolve_event(self, event_host, event_type, minimal=False): elif errors: event_tags.add(f"{rdtype.lower()}-error") - # tag with cloud providers - if not self.in_tests: - to_check = set() - if event_type == "IP_ADDRESS": - to_check.add(event_host) - for rdtype, ips in dns_children.items(): - if rdtype in ("A", "AAAA"): - for ip in ips: - to_check.add(ip) - for ip in to_check: - provider, provider_type, subnet = cloudcheck(ip) - if provider: - event_tags.add(f"{provider_type}-{provider}") - # if needed, mark as unresolved if not is_ip(event_host) and "resolved" not in event_tags: event_tags.add("unresolved") diff --git a/bbot/modules/base.py b/bbot/modules/base.py index a502037597..89db38fd40 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -683,6 +683,7 @@ def _event_precheck(self, event): if "target" not in event.tags: return False, "it did not meet target_only filter criteria" # exclude certain URLs (e.g. javascript): + # TODO: revisit this after httpx rework if event.type.startswith("URL") and self.name != "httpx" and "httpx-only" in event.tags: return False, "its extension was listed in url_extension_httpx_only" @@ -1433,27 +1434,43 @@ async def _worker(self): if event.type == "FINISHED": context = f"{self.name}.finish()" async with self.scan._acatch(context), self._task_counter.count(context): - finish_task = asyncio.create_task(self.finish()) - await finish_task + await self.finish() continue self.debug(f"Got {event} from {getattr(event, 'module', 'unknown_module')}") + + acceptable = True + async with self._task_counter.count(f"event_precheck({event})"): + precheck_pass, reason = self._event_precheck(event) + if not precheck_pass: + self.debug(f"Not hooking {event} because precheck failed ({reason})") + acceptable = False async with self._task_counter.count(f"event_postcheck({event})"): - acceptable, reason = await self._event_postcheck(event) + postcheck_pass, reason = await self._event_postcheck(event) + if not postcheck_pass: + self.debug(f"Not hooking {event} because postcheck failed ({reason})") + acceptable = False + + # whether to pass the event on to the rest of the scan + # defaults to true, unless handle_event returns False + pass_on_event = True + pass_on_event_reason = "" if acceptable: context = f"{self.name}.handle_event({event})" self.scan.stats.event_consumed(event, self) self.debug(f"Hooking {event}") async with self.scan._acatch(context), self._task_counter.count(context): - task_name = f"{self.name}.handle_event({event})" - handle_event_task = asyncio.create_task(self.handle_event(event), name=task_name) - await handle_event_task + pass_on_event = await self.handle_event(event) + with suppress(ValueError, TypeError): + pass_on_event, pass_on_event_reason = pass_on_event + self.debug(f"Finished hooking {event}") - else: - self.debug(f"Not hooking {event} because {reason}") - await self.outgoing_event_queue.put((event, _kwargs)) + if pass_on_event is False: + self.debug(f"Not passing on {event} because {pass_on_event_reason}") + else: + await self.outgoing_event_queue.put((event, _kwargs)) except asyncio.CancelledError: self.log.trace("Worker cancelled") diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index 62ba4362a0..13a0e9f76a 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -1,26 +1,34 @@ +from contextlib import suppress + +from bbot.errors import ValidationError from bbot.modules.base import HookModule class dnsresolve(HookModule): - hooked_events = ["DNS_NAME"] + watched_events = ["*"] _priority = 1 async def setup(self): self.dns_resolution = self.scan.config.get("dns_resolution", False) + self.scope_search_distance = max(0, int(self.config.get("scope_search_distance", 0))) + self.scope_dns_search_distance = max(0, int(self.config.get("scope_dns_search_distance", 1))) + self.scope_distance_modifier = max(self.scope_search_distance, self.scope_dns_search_distance) return True - async def handle_event(self, event): - event.add_tag("dnsresolved") - resolved_hosts = set() - dns_children = {} - dns_tags = set() + async def filter_event(self, event): + if not event.host: + return False, "event does not have host attribute" + return True + async def handle_event(self, event): + self.hugesuccess(event) # skip DNS resolution if it's disabled in the config and the event is a target and we don't have a blacklist # this is a performance optimization and it'd be nice if we could do it for all events not just targets # but for non-target events, we need to know what IPs they resolve to so we can make scope decisions about them skip_dns_resolution = (not self.dns_resolution) and "target" in event.tags and not self.scan.blacklist if skip_dns_resolution: dns_tags = {"resolved"} + dns_children = dict() else: # DNS resolution dns_tags, dns_children = await self.helpers.dns.resolve_event(event, minimal=not self.dns_resolution) @@ -29,47 +37,102 @@ async def handle_event(self, event): event_whitelisted = False event_blacklisted = False for rdtype, children in dns_children.items(): + self.hugeinfo(f"{event.host}: {rdtype}:{children}") if event_blacklisted: break for host in children: if rdtype in ("A", "AAAA", "CNAME"): - for ip in ips: - resolved_hosts.add(ip) + event.resolved_hosts.add(host) # having a CNAME to an in-scope resource doesn't make you in-scope if not event_whitelisted and rdtype != "CNAME": with suppress(ValidationError): - if self.parent_helper.scan.whitelisted(host): + if self.scan.whitelisted(host): event_whitelisted = True # CNAME to a blacklisted resources, means you're blacklisted with suppress(ValidationError): - if self.parent_helper.scan.blacklisted(host): + if self.scan.blacklisted(host): event_blacklisted = True break # kill runaway DNS chains dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) if dns_resolve_distance >= self.helpers.dns.max_dns_resolve_distance: - log.debug( - f"Skipping DNS children for {event} because their DNS resolve distances would be greater than the configured value for this scan ({self.scan.helpers.dns.max_dns_resolve_distance})" + self.debug( + f"Skipping DNS children for {event} because their DNS resolve distances would be greater than the configured value for this scan ({self.helpers.dns.max_dns_resolve_distance})" ) dns_children = {} if event.type in ("DNS_NAME", "IP_ADDRESS"): - event._dns_children = dns_children + event.dns_children = dns_children for tag in dns_tags: event.add_tag(tag) - event._resolved_hosts = resolved_hosts - - event_whitelisted = event_whitelisted_dns | self.scan.whitelisted(event) - event_blacklisted = event_blacklisted_dns | self.scan.blacklisted(event) if event_blacklisted: event.add_tag("blacklisted") reason = "event host" - if event_blacklisted_dns: + if event_blacklisted: reason = "DNS associations" - log.debug(f"Omitting due to blacklisted {reason}: {event}") + self.debug(f"Omitting due to blacklisted {reason}: {event}") return if event_whitelisted: - event.add_tag("whitelisted") + self.debug(f"Making {event} in-scope because it resolves to an in-scope resource") + event.scope_distance = 0 + + # DNS_NAME --> DNS_NAME_UNRESOLVED + if event.type == "DNS_NAME" and "unresolved" in event.tags and not "target" in event.tags: + event.type = "DNS_NAME_UNRESOLVED" + + # check for wildcards + if event.scope_distance <= self.scan.scope_search_distance: + if not "unresolved" in event.tags: + if not self.helpers.is_ip_type(event.host): + await self.helpers.dns.handle_wildcard_event(event) + + # speculate DNS_NAMES and IP_ADDRESSes from other event types + source_event = event + if ( + event.host + and event.type not in ("DNS_NAME", "DNS_NAME_UNRESOLVED", "IP_ADDRESS", "IP_RANGE") + and not (event.type in ("OPEN_TCP_PORT", "URL_UNVERIFIED") and str(event.module) == "speculate") + ): + source_event = self.make_event(event.host, "DNS_NAME", source=event) + # only emit the event if it's not already in the parent chain + if source_event is not None and source_event not in event.get_sources(): + source_event.scope_distance = event.scope_distance + if "target" in event.tags: + source_event.add_tag("target") + await self.emit_event(source_event) + + ### Emit DNS children ### + if self.dns_resolution: + self.hugesuccess(f"emitting children for {event}! (dns children: {event.dns_children})") + emit_children = True + in_dns_scope = -1 < event.scope_distance < self.scope_distance_modifier + self.critical(f"{event.host} in dns scope: {in_dns_scope}") + + if emit_children: + dns_child_events = [] + if event.dns_children: + for rdtype, records in event.dns_children.items(): + self.hugewarning(f"{event.host}: {rdtype}:{records}") + module = self.scan._make_dummy_module_dns(rdtype) + module._priority = 4 + for record in records: + try: + child_event = self.scan.make_event( + record, "DNS_NAME", module=module, source=source_event + ) + # if it's a hostname and it's only one hop away, mark it as affiliate + if child_event.type == "DNS_NAME" and child_event.scope_distance == 1: + child_event.add_tag("affiliate") + host_hash = hash(str(child_event.host)) + if in_dns_scope or self.preset.in_scope(child_event): + dns_child_events.append(child_event) + except ValidationError as e: + self.warning( + f'Event validation failed for DNS child of {source_event}: "{record}" ({rdtype}): {e}' + ) + for child_event in dns_child_events: + self.debug(f"Queueing DNS child for {event}: {child_event}") + await self.emit_event(child_event) diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index 4d6a0e2397..df56361cc9 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -62,14 +62,10 @@ async def _worker_loop(self): # if we have hooks set up, we always get events from the last (lowest priority) hook module. if self.hook_modules: last_hook_module = self.hook_modules[-1] - incoming = last_hook_module.outgoing_event_queue.get_nowait() + event, kwargs = last_hook_module.outgoing_event_queue.get_nowait() else: # otherwise, we go through all the modules - incoming = self.get_event_from_modules() - try: - event, kwargs = incoming - except: - log.critical(incoming) + event, kwargs = self.get_event_from_modules() except asyncio.queues.QueueEmpty: await asyncio.sleep(0.1) continue @@ -131,6 +127,8 @@ async def emit_event(self, event, *args, **kwargs): # skip event if it fails precheck if event.type != "DNS_NAME": acceptable = self._event_precheck(event) + if not acceptable: + return log.debug(f'Module "{event.module}" raised {event}') @@ -204,38 +202,25 @@ async def _emit_event(self, event, **kwargs): on_success_callback = kwargs.pop("on_success_callback", None) abort_if = kwargs.pop("abort_if", None) - event_whitelisted = "whitelisted" in event.tags - - # other blacklist rejections - URL extensions, etc. - if "blacklisted" in event.tags: + # blacklist rejections + event_blacklisted = self.scan.blacklisted(event) + if event_blacklisted or "blacklisted" in event.tags: log.debug(f"Omitting blacklisted event: {event}") return - # DNS_NAME --> DNS_NAME_UNRESOLVED - if event.type == "DNS_NAME" and "unresolved" in event.tags and not "target" in event.tags: - event.type = "DNS_NAME_UNRESOLVED" - - # Cloud tagging - await self.scan.helpers.cloud.tag_event(event) - - # Scope shepherding - # here is where we make sure in-scope events are set to their proper scope distance - if event.host and event_whitelisted: - log.debug(f"Making {event} in-scope") - event.scope_distance = 0 - - # check for wildcards - if event.scope_distance <= self.scan.scope_search_distance: - if not "unresolved" in event.tags: - if not self.scan.helpers.is_ip_type(event.host): - await self.scan.helpers.dns.handle_wildcard_event(event) - # For DNS_NAMEs, we've waited to do this until now, in case event.data changed during handle_wildcard_event() if event.type == "DNS_NAME": acceptable = self._event_precheck(event) if not acceptable: return + # Scope shepherding + # here is where we make sure in-scope events are set to their proper scope distance + event_whitelisted = self.scan.whitelisted(event) + if event.host and event_whitelisted: + log.debug(f"Making {event} in-scope because it matches the scan target") + event.scope_distance = 0 + # now that the event is properly tagged, we can finally make decisions about it abort_result = False if callable(abort_if): @@ -256,58 +241,6 @@ async def _emit_event(self, event, **kwargs): await self.distribute_event(event) - # speculate DNS_NAMES and IP_ADDRESSes from other event types - source_event = event - if ( - event.host - and event.type not in ("DNS_NAME", "DNS_NAME_UNRESOLVED", "IP_ADDRESS", "IP_RANGE") - and not (event.type in ("OPEN_TCP_PORT", "URL_UNVERIFIED") and str(event.module) == "speculate") - ): - source_module = self.scan._make_dummy_module("host", _type="internal") - source_module._priority = 4 - source_event = self.scan.make_event(event.host, "DNS_NAME", module=source_module, source=event) - # only emit the event if it's not already in the parent chain - if source_event is not None and source_event not in source_event.get_sources(): - source_event.scope_distance = event.scope_distance - if "target" in event.tags: - source_event.add_tag("target") - self.queue_event(source_event) - - ### Emit DNS children ### - if self.dns_resolution: - emit_children = True - in_dns_scope = -1 < event.scope_distance < self.scan.scope_dns_search_distance - # only emit DNS children once for each unique host - host_hash = hash(str(event.host)) - if host_hash in self.outgoing_dup_tracker: - emit_children = False - self.outgoing_dup_tracker.add(host_hash) - - if emit_children: - dns_child_events = [] - if event.dns_children: - for rdtype, records in event.dns_children.items(): - module = self.scan._make_dummy_module_dns(rdtype) - module._priority = 4 - for record in records: - try: - child_event = self.scan.make_event( - record, "DNS_NAME", module=module, source=source_event - ) - # if it's a hostname and it's only one hop away, mark it as affiliate - if child_event.type == "DNS_NAME" and child_event.scope_distance == 1: - child_event.add_tag("affiliate") - host_hash = hash(str(child_event.host)) - if in_dns_scope or self.preset.in_scope(child_event): - dns_child_events.append(child_event) - except ValidationError as e: - log.warning( - f'Event validation failed for DNS child of {source_event}: "{record}" ({rdtype}): {e}' - ) - for child_event in dns_child_events: - log.debug(f"Queueing DNS child for {event}: {child_event}") - self.queue_event(child_event) - except ValidationError as e: log.warning(f"Event validation failed with kwargs={kwargs}: {e}") log.trace(traceback.format_exc()) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index e2b813cbc5..4884461031 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -182,9 +182,6 @@ def __init__( # scope distance self.scope_search_distance = max(0, int(self.config.get("scope_search_distance", 0))) - self.scope_dns_search_distance = max( - self.scope_search_distance, int(self.config.get("scope_dns_search_distance", 1)) - ) self.scope_report_distance = int(self.config.get("scope_report_distance", 1)) # url file extensions From 07a4cbdbde5cd38539d6888be2d79615d26125ca Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 9 Apr 2024 02:11:05 -0400 Subject: [PATCH 20/63] continued work on hooks --- bbot/core/helpers/dns/dns.py | 36 ------- bbot/core/helpers/dns/engine.py | 20 +--- bbot/defaults.yml | 9 +- bbot/modules/internal/dnsresolve.py | 146 +++++++++++++++------------- bbot/scanner/manager.py | 13 +-- bbot/scanner/scanner.py | 2 +- 6 files changed, 96 insertions(+), 130 deletions(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 3988420482..821d7b6672 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -5,7 +5,6 @@ from cachetools import LRUCache from bbot.core.engine import EngineClient -from bbot.core.helpers.async_helpers import NamedLock from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name, host_in_host from .engine import DNSEngine @@ -35,7 +34,6 @@ class DNSHelper(EngineClient): wildcard_tests (int): Number of tests to be run for wildcard detection. Defaults to 5. _wildcard_cache (dict): Cache for wildcard detection results. _dns_cache (LRUCache): Cache for DNS resolution results, limited in size. - _event_cache (LRUCache): Cache for event resolution results, tags. Limited in size. resolver_file (Path): File containing system's current resolver nameservers. filter_bad_ptrs (bool): Whether to filter out DNS names that appear to be auto-generated PTR records. Defaults to True. @@ -70,10 +68,6 @@ def __init__(self, parent_helper): self.wildcard_ignore = [] self.wildcard_ignore = tuple([str(d).strip().lower() for d in self.wildcard_ignore]) - # event resolution cache - self._event_cache = LRUCache(maxsize=10000) - self._event_cache_locks = NamedLock() - # copy the system's current resolvers to a text file for tool use self.system_resolvers = dns.resolver.Resolver().nameservers # TODO: DNS server speed test (start in background task) @@ -90,36 +84,6 @@ async def resolve_raw_batch(self, queries): async for _ in self.run_and_yield("resolve_raw_batch", queries=queries): yield _ - async def resolve_event(self, event, minimal=False): - # abort if the event doesn't have a host - if (not event.host) or (event.type in ("IP_RANGE",)): - # tags, whitelisted, blacklisted, children - return set(), dict() - - event_host = str(event.host) - event_type = str(event.type) - event_tags = set() - dns_children = dict() - - if (not event.host) or (event.type in ("IP_RANGE",)): - return event_tags, dns_children - - # lock to ensure resolution of the same host doesn't start while we're working here - async with self._event_cache_locks.lock(event_host): - # try to get data from cache - try: - _event_tags, _dns_children = self._event_cache[event_host] - event_tags.update(_event_tags) - except KeyError: - pass - - kwargs = {"event_host": event_host, "event_type": event_type, "minimal": minimal} - event_tags, dns_children = await self.run_and_return("resolve_event", **kwargs) - - self._event_cache[event_host] = (event_tags, dns_children) - - return event_tags, dns_children - async def is_wildcard(self, query, ips=None, rdtype=None): """ Use this method to check whether a *host* is a wildcard entry diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index abba661a81..491b433cdd 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -29,6 +29,8 @@ log = logging.getLogger("bbot.core.helpers.dns.engine.server") +all_rdtypes = ["A", "AAAA", "SRV", "MX", "NS", "SOA", "CNAME", "TXT"] + class DNSEngine(EngineServer): @@ -42,8 +44,6 @@ class DNSEngine(EngineServer): 99: "_mock_dns", } - all_rdtypes = ["A", "AAAA", "SRV", "MX", "NS", "SOA", "CNAME", "TXT"] - def __init__(self, socket_path, config={}): super().__init__(socket_path) @@ -443,7 +443,7 @@ async def resolve_event(self, event_host, event_type, minimal=False): types = ("PTR",) else: if event_type == "DNS_NAME" and not minimal: - types = self.all_rdtypes + types = all_rdtypes else: types = ("A", "AAAA") queries = [(event_host, t) for t in types] @@ -708,7 +708,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): parent = parent_domain(query) parents = list(domain_parents(query)) - rdtypes_to_check = [rdtype] if rdtype is not None else self.all_rdtypes + rdtypes_to_check = [rdtype] if rdtype is not None else all_rdtypes query_baseline = dict() # if the caller hasn't already done the work of resolving the IPs @@ -807,7 +807,7 @@ async def is_wildcard_domain(self, domain, log_info=False): log.debug(f"Skipping wildcard detection on {domain} because it is excluded in the config") return {} - rdtypes_to_check = set(self.all_rdtypes) + rdtypes_to_check = all_rdtypes # make a list of its parents parents = list(domain_parents(domain, include_self=True)) @@ -890,16 +890,6 @@ async def _connectivity_check(self, interval=5): self._errors.clear() return False - def _parse_rdtype(self, t, default=None): - if isinstance(t, str): - if t.strip().lower() in ("any", "all", "*"): - return self.all_rdtypes - else: - return [t.strip().upper()] - elif any([isinstance(t, x) for x in (list, tuple)]): - return [str(_).strip().upper() for _ in t] - return default - def debug(self, *args, **kwargs): if self._debug: log.trace(*args, **kwargs) diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 6255f0b717..5b6323ae43 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -8,8 +8,6 @@ home: ~/.bbot scope_report_distance: 0 # Generate new DNS_NAME and IP_ADDRESS events through DNS resolution dns_resolution: true -# Limit the number of BBOT threads -max_threads: 25 # Rate-limit DNS dns_queries_per_second: 1000 # Rate-limit HTTP @@ -42,12 +40,19 @@ scope_dns_search_distance: 1 # Limit how many DNS records can be followed in a row (stop malicious/runaway DNS records) dns_resolve_distance: 5 +# Limit the number of scan manager workers +manager_tasks: 5 + # Infer certain events from others, e.g. IPs from IP ranges, DNS_NAMEs from URLs, etc. speculate: True # Passively search event data for URLs, hostnames, emails, etc. excavate: True # Summarize activity at the end of a scan aggregate: True +# DNS resolution +dnsresolve: True +# Cloud provider tagging +cloudcheck: True # How to handle installation of module dependencies # Choices are: diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index 13a0e9f76a..dc8ba996b6 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -1,58 +1,81 @@ from contextlib import suppress +from cachetools import LRUCache from bbot.errors import ValidationError from bbot.modules.base import HookModule +from bbot.core.helpers.dns.engine import all_rdtypes +from bbot.core.helpers.async_helpers import NamedLock class dnsresolve(HookModule): watched_events = ["*"] _priority = 1 + _max_event_handlers = 25 async def setup(self): self.dns_resolution = self.scan.config.get("dns_resolution", False) - self.scope_search_distance = max(0, int(self.config.get("scope_search_distance", 0))) - self.scope_dns_search_distance = max(0, int(self.config.get("scope_dns_search_distance", 1))) - self.scope_distance_modifier = max(self.scope_search_distance, self.scope_dns_search_distance) + self.scope_search_distance = max(0, int(self.scan.config.get("scope_search_distance", 0))) + self.scope_dns_search_distance = max(0, int(self.scan.config.get("scope_dns_search_distance", 1))) + # event resolution cache + self._event_cache = LRUCache(maxsize=10000) + self._event_cache_locks = NamedLock() return True + @property + def scope_distance_modifier(self): + return max(self.scope_search_distance, self.scope_dns_search_distance) + async def filter_event(self, event): - if not event.host: + if (not event.host) or (event.type in ("IP_RANGE",)): return False, "event does not have host attribute" return True async def handle_event(self, event): - self.hugesuccess(event) - # skip DNS resolution if it's disabled in the config and the event is a target and we don't have a blacklist - # this is a performance optimization and it'd be nice if we could do it for all events not just targets - # but for non-target events, we need to know what IPs they resolve to so we can make scope decisions about them - skip_dns_resolution = (not self.dns_resolution) and "target" in event.tags and not self.scan.blacklist - if skip_dns_resolution: - dns_tags = {"resolved"} - dns_children = dict() - else: - # DNS resolution - dns_tags, dns_children = await self.helpers.dns.resolve_event(event, minimal=not self.dns_resolution) - - # whitelisting / blacklisting based on resolved hosts + dns_tags = set() + dns_children = dict() + + # DNS resolution event_whitelisted = False event_blacklisted = False - for rdtype, children in dns_children.items(): - self.hugeinfo(f"{event.host}: {rdtype}:{children}") - if event_blacklisted: - break - for host in children: - if rdtype in ("A", "AAAA", "CNAME"): - event.resolved_hosts.add(host) - # having a CNAME to an in-scope resource doesn't make you in-scope - if not event_whitelisted and rdtype != "CNAME": - with suppress(ValidationError): - if self.scan.whitelisted(host): - event_whitelisted = True - # CNAME to a blacklisted resources, means you're blacklisted - with suppress(ValidationError): - if self.scan.blacklisted(host): - event_blacklisted = True - break + + event_host = str(event.host) + event_host_hash = hash(str(event.host)) + + emit_children = event_host_hash not in self._event_cache + + async with self._event_cache_locks.lock(event_host_hash): + try: + # try to get from cache + dns_tags, dns_children, event_whitelisted, event_blacklisted = self._event_cache[event_host_hash] + except KeyError: + queries = [(event_host, rdtype) for rdtype in all_rdtypes] + async for (query, rdtype), (answers, errors) in self.helpers.dns.resolve_raw_batch(queries): + for answer, _rdtype in answers: + dns_tags.add(f"{rdtype.lower()}-record") + try: + dns_children[_rdtype].add(answer) + except KeyError: + dns_children[_rdtype] = {answer} + + # whitelisting / blacklisting based on resolved hosts + for rdtype, children in dns_children.items(): + if event_blacklisted: + break + for host in children: + if rdtype in ("A", "AAAA", "CNAME"): + event.resolved_hosts.add(host) + # having a CNAME to an in-scope resource doesn't make you in-scope + if not event_whitelisted and rdtype != "CNAME": + with suppress(ValidationError): + if self.scan.whitelisted(host): + event_whitelisted = True + # CNAME to a blacklisted resources, means you're blacklisted + with suppress(ValidationError): + if self.scan.blacklisted(host): + event_blacklisted = True + break + + self._event_cache[event_host_hash] = dns_tags, dns_children, event_whitelisted, event_blacklisted # kill runaway DNS chains dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) @@ -73,7 +96,6 @@ async def handle_event(self, event): if event_blacklisted: reason = "DNS associations" self.debug(f"Omitting due to blacklisted {reason}: {event}") - return if event_whitelisted: self.debug(f"Making {event} in-scope because it resolves to an in-scope resource") @@ -102,37 +124,29 @@ async def handle_event(self, event): source_event.scope_distance = event.scope_distance if "target" in event.tags: source_event.add_tag("target") - await self.emit_event(source_event) + self.scan.manager.queue_event(source_event) ### Emit DNS children ### - if self.dns_resolution: - self.hugesuccess(f"emitting children for {event}! (dns children: {event.dns_children})") - emit_children = True + if emit_children: in_dns_scope = -1 < event.scope_distance < self.scope_distance_modifier - self.critical(f"{event.host} in dns scope: {in_dns_scope}") - - if emit_children: - dns_child_events = [] - if event.dns_children: - for rdtype, records in event.dns_children.items(): - self.hugewarning(f"{event.host}: {rdtype}:{records}") - module = self.scan._make_dummy_module_dns(rdtype) - module._priority = 4 - for record in records: - try: - child_event = self.scan.make_event( - record, "DNS_NAME", module=module, source=source_event - ) - # if it's a hostname and it's only one hop away, mark it as affiliate - if child_event.type == "DNS_NAME" and child_event.scope_distance == 1: - child_event.add_tag("affiliate") - host_hash = hash(str(child_event.host)) - if in_dns_scope or self.preset.in_scope(child_event): - dns_child_events.append(child_event) - except ValidationError as e: - self.warning( - f'Event validation failed for DNS child of {source_event}: "{record}" ({rdtype}): {e}' - ) - for child_event in dns_child_events: - self.debug(f"Queueing DNS child for {event}: {child_event}") - await self.emit_event(child_event) + dns_child_events = [] + if event.dns_children: + for rdtype, records in event.dns_children.items(): + module = self.scan._make_dummy_module_dns(rdtype) + module._priority = 4 + for record in records: + try: + child_event = self.scan.make_event(record, "DNS_NAME", module=module, source=source_event) + # if it's a hostname and it's only one hop away, mark it as affiliate + if child_event.type == "DNS_NAME" and child_event.scope_distance == 1: + child_event.add_tag("affiliate") + host_hash = hash(str(child_event.host)) + if in_dns_scope or self.preset.in_scope(child_event): + dns_child_events.append(child_event) + except ValidationError as e: + self.warning( + f'Event validation failed for DNS child of {source_event}: "{record}" ({rdtype}): {e}' + ) + for child_event in dns_child_events: + self.debug(f"Queueing DNS child for {event}: {child_event}") + self.scan.manager.queue_event(child_event) diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index df56361cc9..45887a05d5 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -125,10 +125,9 @@ async def emit_event(self, event, *args, **kwargs): quick = (quick_kwarg or quick_event) and not callbacks_requested # skip event if it fails precheck - if event.type != "DNS_NAME": - acceptable = self._event_precheck(event) - if not acceptable: - return + acceptable = self._event_precheck(event) + if not acceptable: + return log.debug(f'Module "{event.module}" raised {event}') @@ -208,12 +207,6 @@ async def _emit_event(self, event, **kwargs): log.debug(f"Omitting blacklisted event: {event}") return - # For DNS_NAMEs, we've waited to do this until now, in case event.data changed during handle_wildcard_event() - if event.type == "DNS_NAME": - acceptable = self._event_precheck(event) - if not acceptable: - return - # Scope shepherding # here is where we make sure in-scope events are set to their proper scope distance event_whitelisted = self.scan.whitelisted(event) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 4884461031..42a199d577 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -165,7 +165,7 @@ def __init__( self._status = "NOT_STARTED" self._status_code = 0 - self.max_workers = max(1, self.config.get("max_threads", 25)) + self.max_workers = max(1, self.config.get("manager_tasks", 5)) self.modules = OrderedDict({}) self._modules_loaded = False From b1a8c2329011930382ffbf01100d89b831b6d348 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 9 Apr 2024 02:29:39 -0400 Subject: [PATCH 21/63] more work on hooks --- bbot/core/helpers/dns/engine.py | 79 ----------------------------- bbot/modules/internal/dnsresolve.py | 25 ++++++++- 2 files changed, 24 insertions(+), 80 deletions(-) diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 491b433cdd..5e5f2b81d0 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -408,85 +408,6 @@ async def handle_wildcard_event(self, event, children): finally: log.debug(f"Finished handle_wildcard_event({event}, children={children})") - async def resolve_event(self, event_host, event_type, minimal=False): - """ - Tag the given event with the appropriate DNS record types and optionally create child - events based on DNS resolutions. - - Args: - event (object): The event to be resolved and tagged. - minimal (bool, optional): If set to True, the function will perform minimal DNS - resolution. Defaults to False. - - Returns: - tuple: A 4-tuple containing the following items: - - event_tags (set): Set of tags for the event. - - dns_children (dict): Dictionary containing child events from DNS resolutions. - - Examples: - >>> event = make_event("evilcorp.com") - >>> resolve_event(event) - ({'resolved', 'ns-record', 'a-record',}, False, False, {'A': {IPv4Address('1.2.3.4'), IPv4Address('1.2.3.5')}, 'NS': {'ns1.evilcorp.com'}}) - - Note: - This method does not modify the passed in `event`. Instead, it returns data - that can be used to modify or act upon the `event`. - """ - log.debug(f"Resolving event {event_type}:{event_host}") - event_tags = set() - dns_children = dict() - - try: - types = () - if is_ip(event_host): - if not minimal: - types = ("PTR",) - else: - if event_type == "DNS_NAME" and not minimal: - types = all_rdtypes - else: - types = ("A", "AAAA") - queries = [(event_host, t) for t in types] - async for (query, rdtype), (answers, errors) in self.resolve_raw_batch(queries): - if answers: - rdtype = str(rdtype).upper() - event_tags.add("resolved") - event_tags.add(f"{rdtype.lower()}-record") - - for host, _rdtype in answers: - if host: - host = make_ip_type(host) - - if self.filter_bad_ptrs and rdtype in ("PTR") and is_ptr(host): - self.debug(f"Filtering out bad PTR: {host}") - continue - - try: - dns_children[_rdtype].add(host) - except KeyError: - dns_children[_rdtype] = {host} - - elif errors: - event_tags.add(f"{rdtype.lower()}-error") - - # if needed, mark as unresolved - if not is_ip(event_host) and "resolved" not in event_tags: - event_tags.add("unresolved") - # check for private IPs - for rdtype, ips in dns_children.items(): - for ip in ips: - try: - ip = ipaddress.ip_address(ip) - if ip.is_private: - event_tags.add("private-ip") - except ValueError: - continue - - return event_tags, dns_children - - finally: - log.debug(f"Finished resolving event {event_type}:{event_host}") - def event_cache_get(self, host): """ Retrieves cached event data based on the given host. diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index dc8ba996b6..f7295235e0 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -1,3 +1,4 @@ +import ipaddress from contextlib import suppress from cachetools import LRUCache @@ -40,6 +41,7 @@ async def handle_event(self, event): event_host = str(event.host) event_host_hash = hash(str(event.host)) + event_is_ip = self.helpers.is_ip(event.host) emit_children = event_host_hash not in self._event_cache @@ -49,7 +51,10 @@ async def handle_event(self, event): dns_tags, dns_children, event_whitelisted, event_blacklisted = self._event_cache[event_host_hash] except KeyError: queries = [(event_host, rdtype) for rdtype in all_rdtypes] + error_rdtypes = [] async for (query, rdtype), (answers, errors) in self.helpers.dns.resolve_raw_batch(queries): + if errors: + error_rdtypes.append(rdtype) for answer, _rdtype in answers: dns_tags.add(f"{rdtype.lower()}-record") try: @@ -57,11 +62,21 @@ async def handle_event(self, event): except KeyError: dns_children[_rdtype] = {answer} - # whitelisting / blacklisting based on resolved hosts + for rdtype in error_rdtypes: + if rdtype not in dns_children: + dns_tags.add(f"{rdtype.lower()}-error") + + if not event_is_ip: + if dns_children: + dns_tags.add("resolved") + else: + dns_tags.add("unresolved") + for rdtype, children in dns_children.items(): if event_blacklisted: break for host in children: + # whitelisting / blacklisting based on resolved hosts if rdtype in ("A", "AAAA", "CNAME"): event.resolved_hosts.add(host) # having a CNAME to an in-scope resource doesn't make you in-scope @@ -75,6 +90,14 @@ async def handle_event(self, event): event_blacklisted = True break + # check for private IPs + try: + ip = ipaddress.ip_address(host) + if ip.is_private: + dns_tags.add("private-ip") + except ValueError: + continue + self._event_cache[event_host_hash] = dns_tags, dns_children, event_whitelisted, event_blacklisted # kill runaway DNS chains From 490bd36a0896b517e582aeb824d2bb8194ac1ab3 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 9 Apr 2024 15:46:26 -0400 Subject: [PATCH 22/63] more wip hooks --- bbot/core/helpers/dns/dns.py | 102 ++++---------------- bbot/core/helpers/dns/engine.py | 140 +++------------------------- bbot/modules/base.py | 9 +- bbot/modules/internal/dnsresolve.py | 107 +++++++++++++++------ bbot/scanner/manager.py | 2 +- bbot/test/test_step_1/test_dns.py | 88 +++++++++++------ 6 files changed, 180 insertions(+), 268 deletions(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 821d7b6672..cc0a1ff4a5 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -2,7 +2,6 @@ import logging import dns.exception import dns.asyncresolver -from cachetools import LRUCache from bbot.core.engine import EngineClient from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name, host_in_host @@ -119,20 +118,10 @@ async def is_wildcard(self, query, ips=None, rdtype=None): if [ips, rdtype].count(None) == 1: raise ValueError("Both ips and rdtype must be specified") - # skip if query isn't a dns name - if not is_dns_name(query): + query = self._wildcard_prevalidation(query) + if not query: return {} - # skip check if the query's parent domain is excluded in the config - for d in self.wildcard_ignore: - if host_in_host(query, d): - log.debug(f"Skipping wildcard detection on {query} because it is excluded in the config") - return {} - - query = clean_dns_record(query) - # skip check if it's an IP or a plain hostname - if is_ip(query) or not "." in query: - return {} # skip check if the query is a domain if is_domain(query): return {} @@ -140,80 +129,29 @@ async def is_wildcard(self, query, ips=None, rdtype=None): return await self.run_and_return("is_wildcard", query=query, ips=ips, rdtype=rdtype) async def is_wildcard_domain(self, domain, log_info=False): - return await self.run_and_return("is_wildcard_domain", domain=domain, log_info=False) + domain = self._wildcard_prevalidation(domain) + if not domain: + return {} - async def handle_wildcard_event(self, event): - """ - Used within BBOT's scan manager to detect and tag DNS wildcard events. + return await self.run_and_return("is_wildcard_domain", domain=domain, log_info=False) - Wildcards are detected for every major record type. If a wildcard is detected, its data - is overwritten, for example: `_wildcard.evilcorp.com`. + def _wildcard_prevalidation(self, host): + host = clean_dns_record(host) + # skip check if it's an IP or a plain hostname + if is_ip(host) or not "." in host: + return False - Args: - event (Event): The event to check for wildcards. + # skip if query isn't a dns name + if not is_dns_name(host): + return False - Returns: - None: This method modifies the `event` in place and does not return a value. + # skip check if the query's parent domain is excluded in the config + for d in self.wildcard_ignore: + if host_in_host(host, d): + log.debug(f"Skipping wildcard detection on {host} because it is excluded in the config") + return False - Examples: - >>> handle_wildcard_event(event) - # The `event` might now have tags like ["wildcard", "a-wildcard", "aaaa-wildcard"] and - # its `data` attribute might be modified to "_wildcard.evilcorp.com" if it was detected - # as a wildcard. - """ - log.debug(f"Entering handle_wildcard_event({event}, children={event.dns_children})") - try: - event_host = str(event.host) - # wildcard checks - if not is_ip(event.host): - # check if the dns name itself is a wildcard entry - wildcard_rdtypes = await self.is_wildcard(event_host) - for rdtype, (is_wildcard, wildcard_host) in wildcard_rdtypes.items(): - wildcard_tag = "error" - if is_wildcard == True: - event.add_tag("wildcard") - wildcard_tag = "wildcard" - event.add_tag(f"{rdtype.lower()}-{wildcard_tag}") - - # wildcard event modification (www.evilcorp.com --> _wildcard.evilcorp.com) - if (not is_ip(event.host)) and event.dns_children: - if wildcard_rdtypes: - # these are the rdtypes that successfully resolve - resolved_rdtypes = set([c.upper() for c in event.dns_children]) - # these are the rdtypes that have wildcards - wildcard_rdtypes_set = set(wildcard_rdtypes) - # consider the event a full wildcard if all its records are wildcards - event_is_wildcard = False - if resolved_rdtypes: - event_is_wildcard = all(r in wildcard_rdtypes_set for r in resolved_rdtypes) - - if event_is_wildcard: - if event.type in ("DNS_NAME",) and not "_wildcard" in event.data.split("."): - wildcard_parent = self.parent_helper.parent_domain(event_host) - for rdtype, (_is_wildcard, _parent_domain) in wildcard_rdtypes.items(): - if _is_wildcard: - wildcard_parent = _parent_domain - break - wildcard_data = f"_wildcard.{wildcard_parent}" - if wildcard_data != event.data: - log.debug( - f'Wildcard detected, changing event.data "{event.data}" --> "{wildcard_data}"' - ) - event.data = wildcard_data - - # TODO: transplant this - # tag wildcard domains for convenience - # elif is_domain(event_host) or hash(event_host) in self._wildcard_cache: - # event_target = "target" in event.tags - # wildcard_domain_results = await self.is_wildcard_domain(event_host, log_info=event_target) - # for hostname, wildcard_domain_rdtypes in wildcard_domain_results.items(): - # if wildcard_domain_rdtypes: - # event.add_tag("wildcard-domain") - # for rdtype, ips in wildcard_domain_rdtypes.items(): - # event.add_tag(f"{rdtype.lower()}-wildcard-domain") - - finally: - log.debug(f"Finished handle_wildcard_event({event}, children={event.dns_children})") + return host async def _mock_dns(self, mock_data): from .mock import MockResolver diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 5e5f2b81d0..b8e184264b 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -3,27 +3,21 @@ import time import asyncio import logging -import ipaddress import traceback -from contextlib import suppress from cachetools import LRUCache +from contextlib import suppress from ..regexes import dns_name_regex from bbot.errors import DNSWildcardBreak from bbot.core.engine import EngineServer from bbot.core.helpers.async_helpers import NamedLock from bbot.core.helpers.misc import ( - clean_dns_record, - parent_domain, - domain_parents, is_ip, - is_domain, - is_ptr, - is_dns_name, - host_in_host, - make_ip_type, - smart_decode, rand_string, + smart_decode, + parent_domain, + domain_parents, + clean_dns_record, ) @@ -36,11 +30,10 @@ class DNSEngine(EngineServer): CMDS = { 0: "resolve", - 1: "resolve_event", - 2: "resolve_batch", - 3: "resolve_raw_batch", - 4: "is_wildcard", - 5: "is_wildcard_domain", + 1: "resolve_batch", + 2: "resolve_raw_batch", + 3: "is_wildcard", + 4: "is_wildcard_domain", 99: "_mock_dns", } @@ -336,107 +329,6 @@ async def _resolve_ip(self, query, **kwargs): return results, errors - async def handle_wildcard_event(self, event, children): - """ - Used within BBOT's scan manager to detect and tag DNS wildcard events. - - Wildcards are detected for every major record type. If a wildcard is detected, its data - is overwritten, for example: `_wildcard.evilcorp.com`. - - Args: - event (object): The event to check for wildcards. - children (list): A list of the event's resulting DNS children after resolution. - - Returns: - None: This method modifies the `event` in place and does not return a value. - - Examples: - >>> handle_wildcard_event(event, children) - # The `event` might now have tags like ["wildcard", "a-wildcard", "aaaa-wildcard"] and - # its `data` attribute might be modified to "_wildcard.evilcorp.com" if it was detected - # as a wildcard. - """ - log.debug(f"Entering handle_wildcard_event({event}, children={children})") - try: - event_host = str(event.host) - event_is_ip = is_ip(event_host) - # wildcard checks - if not event_is_ip: - # check if the dns name itself is a wildcard entry - wildcard_rdtypes = await self.is_wildcard(event_host) - for rdtype, (is_wildcard, wildcard_host) in wildcard_rdtypes.items(): - wildcard_tag = "error" - if is_wildcard == True: - event.add_tag("wildcard") - wildcard_tag = "wildcard" - event.add_tag(f"{rdtype.lower()}-{wildcard_tag}") - - # wildcard event modification (www.evilcorp.com --> _wildcard.evilcorp.com) - if not event_is_ip and children: - if wildcard_rdtypes: - # these are the rdtypes that successfully resolve - resolved_rdtypes = set([c.upper() for c in children]) - # these are the rdtypes that have wildcards - wildcard_rdtypes_set = set(wildcard_rdtypes) - # consider the event a full wildcard if all its records are wildcards - event_is_wildcard = False - if resolved_rdtypes: - event_is_wildcard = all(r in wildcard_rdtypes_set for r in resolved_rdtypes) - - if event_is_wildcard: - if event.type in ("DNS_NAME",) and not "_wildcard" in event.data.split("."): - wildcard_parent = parent_domain(event_host) - for rdtype, (_is_wildcard, _parent_domain) in wildcard_rdtypes.items(): - if _is_wildcard: - wildcard_parent = _parent_domain - break - wildcard_data = f"_wildcard.{wildcard_parent}" - if wildcard_data != event.data: - log.debug( - f'Wildcard detected, changing event.data "{event.data}" --> "{wildcard_data}"' - ) - event.data = wildcard_data - # tag wildcard domains for convenience - elif is_domain(event_host) or hash(event_host) in self._wildcard_cache: - event_target = "target" in event.tags - wildcard_domain_results = await self.is_wildcard_domain(event_host, log_info=event_target) - for hostname, wildcard_domain_rdtypes in wildcard_domain_results.items(): - if wildcard_domain_rdtypes: - event.add_tag("wildcard-domain") - for rdtype, ips in wildcard_domain_rdtypes.items(): - event.add_tag(f"{rdtype.lower()}-wildcard-domain") - finally: - log.debug(f"Finished handle_wildcard_event({event}, children={children})") - - def event_cache_get(self, host): - """ - Retrieves cached event data based on the given host. - - Args: - host (str): The host for which the event data is to be retrieved. - - Returns: - tuple: A 4-tuple containing the following items: - - event_tags (set): Set of tags for the event. - - dns_children (set): Set containing child events from DNS resolutions. - - Examples: - Assuming an event with host "www.evilcorp.com" has been cached: - - >>> event_cache_get("www.evilcorp.com") - ({"resolved", "a-record"}, False, False, {'1.2.3.4'}) - - Assuming no event with host "www.notincache.com" has been cached: - - >>> event_cache_get("www.notincache.com") - (set(), set()) - """ - try: - event_tags, dns_children = self._event_cache[host] - return (event_tags, dns_children) - except KeyError: - return set(), set() - async def resolve_batch(self, queries, threads=10, **kwargs): """ A helper to execute a bunch of DNS requests. @@ -717,18 +609,8 @@ async def is_wildcard_domain(self, domain, log_info=False): {} """ wildcard_domain_results = {} - domain = clean_dns_record(domain) - - if not is_dns_name(domain): - return {} - - # skip check if the query's parent domain is excluded in the config - for d in self.wildcard_ignore: - if host_in_host(domain, d): - log.debug(f"Skipping wildcard detection on {domain} because it is excluded in the config") - return {} - rdtypes_to_check = all_rdtypes + rdtypes_to_check = set(all_rdtypes) # make a list of its parents parents = list(domain_parents(domain, include_self=True)) @@ -751,7 +633,7 @@ async def is_wildcard_domain(self, domain, log_info=False): wildcard_results = dict() queries = [] - for rdtype in list(rdtypes_to_check): + for rdtype in rdtypes_to_check: for _ in range(self.wildcard_tests): rand_query = f"{rand_string(digits=False, length=10)}.{host}" queries.append((rand_query, rdtype)) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index 89db38fd40..52d52d656b 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -1467,10 +1467,11 @@ async def _worker(self): self.debug(f"Finished hooking {event}") - if pass_on_event is False: - self.debug(f"Not passing on {event} because {pass_on_event_reason}") - else: - await self.outgoing_event_queue.put((event, _kwargs)) + if pass_on_event is False: + self.debug(f"Not passing on {event} because {pass_on_event_reason}") + return + + await self.outgoing_event_queue.put((event, _kwargs)) except asyncio.CancelledError: self.log.trace("Worker cancelled") diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index f7295235e0..9ff52e15e1 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -17,9 +17,11 @@ async def setup(self): self.dns_resolution = self.scan.config.get("dns_resolution", False) self.scope_search_distance = max(0, int(self.scan.config.get("scope_search_distance", 0))) self.scope_dns_search_distance = max(0, int(self.scan.config.get("scope_dns_search_distance", 1))) + # event resolution cache self._event_cache = LRUCache(maxsize=10000) self._event_cache_locks = NamedLock() + return True @property @@ -34,8 +36,6 @@ async def filter_event(self, event): async def handle_event(self, event): dns_tags = set() dns_children = dict() - - # DNS resolution event_whitelisted = False event_blacklisted = False @@ -43,13 +43,17 @@ async def handle_event(self, event): event_host_hash = hash(str(event.host)) event_is_ip = self.helpers.is_ip(event.host) + # only emit DNS children if we haven't seen this host before emit_children = event_host_hash not in self._event_cache + # we do DNS resolution inside a lock to make sure we don't duplicate work + # once the resolution happens, it will be cached so it doesn't need to happen again async with self._event_cache_locks.lock(event_host_hash): try: # try to get from cache dns_tags, dns_children, event_whitelisted, event_blacklisted = self._event_cache[event_host_hash] except KeyError: + # if missing from cache, do DNS resolution queries = [(event_host, rdtype) for rdtype in all_rdtypes] error_rdtypes = [] async for (query, rdtype), (answers, errors) in self.helpers.dns.resolve_raw_batch(queries): @@ -78,7 +82,6 @@ async def handle_event(self, event): for host in children: # whitelisting / blacklisting based on resolved hosts if rdtype in ("A", "AAAA", "CNAME"): - event.resolved_hosts.add(host) # having a CNAME to an in-scope resource doesn't make you in-scope if not event_whitelisted and rdtype != "CNAME": with suppress(ValidationError): @@ -87,6 +90,7 @@ async def handle_event(self, event): # CNAME to a blacklisted resources, means you're blacklisted with suppress(ValidationError): if self.scan.blacklisted(host): + dns_tags.add("blacklisted") event_blacklisted = True break @@ -98,8 +102,32 @@ async def handle_event(self, event): except ValueError: continue + # store results in cache self._event_cache[event_host_hash] = dns_tags, dns_children, event_whitelisted, event_blacklisted + # abort if the event resolves to something blacklisted + if event_blacklisted: + event.add_tag("blacklisted") + return False, f"blacklisted DNS record" + + # set resolved_hosts attribute + for rdtype, children in dns_children.items(): + for host in children: + event.resolved_hosts.add(host) + + # set dns_children attribute + event.dns_children = dns_children + + # if the event resolves to an in-scope IP, set its scope distance to 0 + if event_whitelisted: + self.debug(f"Making {event} in-scope because it resolves to an in-scope resource") + event.scope_distance = 0 + + # check for wildcards, only if the event resolves to something isn't an IP + if (not event_is_ip) and (dns_children): + if event.scope_distance <= self.scan.scope_search_distance: + await self.handle_wildcard_event(event) + # kill runaway DNS chains dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) if dns_resolve_distance >= self.helpers.dns.max_dns_resolve_distance: @@ -108,31 +136,20 @@ async def handle_event(self, event): ) dns_children = {} + # if the event is a DNS_NAME or IP, tag with "a-record", "ptr-record", etc. if event.type in ("DNS_NAME", "IP_ADDRESS"): - event.dns_children = dns_children for tag in dns_tags: event.add_tag(tag) - if event_blacklisted: - event.add_tag("blacklisted") - reason = "event host" - if event_blacklisted: - reason = "DNS associations" - self.debug(f"Omitting due to blacklisted {reason}: {event}") - - if event_whitelisted: - self.debug(f"Making {event} in-scope because it resolves to an in-scope resource") - event.scope_distance = 0 - - # DNS_NAME --> DNS_NAME_UNRESOLVED + # If the event is unresolved, change its type to DNS_NAME_UNRESOLVED if event.type == "DNS_NAME" and "unresolved" in event.tags and not "target" in event.tags: event.type = "DNS_NAME_UNRESOLVED" - - # check for wildcards - if event.scope_distance <= self.scan.scope_search_distance: - if not "unresolved" in event.tags: - if not self.helpers.is_ip_type(event.host): - await self.helpers.dns.handle_wildcard_event(event) + else: + # otherwise, check for wildcards + if event.scope_distance <= self.scan.scope_search_distance: + if not "unresolved" in event.tags: + if not self.helpers.is_ip_type(event.host): + await self.helpers.dns.handle_wildcard_event(event) # speculate DNS_NAMES and IP_ADDRESSes from other event types source_event = event @@ -149,12 +166,12 @@ async def handle_event(self, event): source_event.add_tag("target") self.scan.manager.queue_event(source_event) - ### Emit DNS children ### + # emit DNS children if emit_children: in_dns_scope = -1 < event.scope_distance < self.scope_distance_modifier dns_child_events = [] - if event.dns_children: - for rdtype, records in event.dns_children.items(): + if dns_children: + for rdtype, records in dns_children.items(): module = self.scan._make_dummy_module_dns(rdtype) module._priority = 4 for record in records: @@ -163,7 +180,6 @@ async def handle_event(self, event): # if it's a hostname and it's only one hop away, mark it as affiliate if child_event.type == "DNS_NAME" and child_event.scope_distance == 1: child_event.add_tag("affiliate") - host_hash = hash(str(child_event.host)) if in_dns_scope or self.preset.in_scope(child_event): dns_child_events.append(child_event) except ValidationError as e: @@ -173,3 +189,42 @@ async def handle_event(self, event): for child_event in dns_child_events: self.debug(f"Queueing DNS child for {event}: {child_event}") self.scan.manager.queue_event(child_event) + + async def handle_wildcard_event(self, event): + self.debug(f"Entering handle_wildcard_event({event}, children={event.dns_children})") + try: + event_host = str(event.host) + # check if the dns name itself is a wildcard entry + wildcard_rdtypes = await self.helpers.is_wildcard(event_host) + for rdtype, (is_wildcard, wildcard_host) in wildcard_rdtypes.items(): + wildcard_tag = "error" + if is_wildcard == True: + event.add_tag("wildcard") + wildcard_tag = "wildcard" + event.add_tag(f"{rdtype.lower()}-{wildcard_tag}") + + # wildcard event modification (www.evilcorp.com --> _wildcard.evilcorp.com) + if wildcard_rdtypes: + # these are the rdtypes that successfully resolve + resolved_rdtypes = set([c.upper() for c in event.dns_children]) + # these are the rdtypes that have wildcards + wildcard_rdtypes_set = set(wildcard_rdtypes) + # consider the event a full wildcard if all its records are wildcards + event_is_wildcard = False + if resolved_rdtypes: + event_is_wildcard = all(r in wildcard_rdtypes_set for r in resolved_rdtypes) + + if event_is_wildcard: + if event.type in ("DNS_NAME",) and not "_wildcard" in event.data.split("."): + wildcard_parent = self.helpers.parent_domain(event_host) + for rdtype, (_is_wildcard, _parent_domain) in wildcard_rdtypes.items(): + if _is_wildcard: + wildcard_parent = _parent_domain + break + wildcard_data = f"_wildcard.{wildcard_parent}" + if wildcard_data != event.data: + self.debug(f'Wildcard detected, changing event.data "{event.data}" --> "{wildcard_data}"') + event.data = wildcard_data + + finally: + self.debug(f"Finished handle_wildcard_event({event}, children={event.dns_children})") diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index 45887a05d5..dd964825c3 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -152,7 +152,7 @@ def _event_precheck(self, event): if event._dummy: log.warning(f"Cannot emit dummy event: {event}") return False - if event == event.get_source(): + if (not event.type == "SCAN") and (event == event.get_source()): log.debug(f"Skipping event with self as source: {event}") return False if event._graph_important: diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index 07beca1f25..f5f528ac34 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -31,7 +31,7 @@ async def test_dns_engine(bbot_scanner): @pytest.mark.asyncio -async def test_dns(bbot_scanner): +async def test_dns_resolution(bbot_scanner): scan = bbot_scanner("1.1.1.1") from bbot.core.helpers.dns.engine import DNSEngine @@ -108,21 +108,22 @@ async def test_dns(bbot_scanner): assert not hash(f"one.one.one.one:A") in dnsengine._dns_cache # Ensure events with hosts have resolved_hosts attribute populated - resolved_hosts_event1 = scan.make_event("one.one.one.one", "DNS_NAME", dummy=True) - resolved_hosts_event2 = scan.make_event("http://one.one.one.one/", "URL_UNVERIFIED", dummy=True) - assert resolved_hosts_event1.host not in scan.helpers.dns._event_cache - assert resolved_hosts_event2.host not in scan.helpers.dns._event_cache - event_tags1, event_whitelisted1, event_blacklisted1, children1 = await scan.helpers.resolve_event( - resolved_hosts_event1 - ) - assert resolved_hosts_event1.host in scan.helpers.dns._event_cache - assert resolved_hosts_event2.host in scan.helpers.dns._event_cache - event_tags2, event_whitelisted2, event_blacklisted2, children2 = await scan.helpers.resolve_event( - resolved_hosts_event2 - ) - assert "1.1.1.1" in [str(x) for x in children1["A"]] - assert "1.1.1.1" in [str(x) for x in children2["A"]] - assert set(children1.keys()) == set(children2.keys()) + await scan._prep() + resolved_hosts_event1 = scan.make_event("one.one.one.one", "DNS_NAME", source=scan.root_event) + resolved_hosts_event2 = scan.make_event("http://one.one.one.one/", "URL_UNVERIFIED", source=scan.root_event) + dnsresolve = scan.modules["dnsresolve"] + assert hash(resolved_hosts_event1.host) not in dnsresolve._event_cache + assert hash(resolved_hosts_event2.host) not in dnsresolve._event_cache + await dnsresolve.handle_event(resolved_hosts_event1) + assert hash(resolved_hosts_event1.host) in dnsresolve._event_cache + assert hash(resolved_hosts_event2.host) in dnsresolve._event_cache + await dnsresolve.handle_event(resolved_hosts_event2) + assert "1.1.1.1" in resolved_hosts_event2.resolved_hosts + assert "1.1.1.1" in resolved_hosts_event2.dns_children["A"] + assert resolved_hosts_event1.resolved_hosts == resolved_hosts_event2.resolved_hosts + assert resolved_hosts_event1.dns_children == resolved_hosts_event2.dns_children + assert "a-record" in resolved_hosts_event1.tags + assert not "a-record" in resolved_hosts_event2.tags scan2 = bbot_scanner("evilcorp.com", config={"dns_resolution": True}) await scan2.helpers.dns._mock_dns( @@ -178,12 +179,11 @@ async def test_wildcards(bbot_scanner): wildcard_event3 = scan.make_event("github.io", "DNS_NAME", dummy=True) # event resolution - event_tags1, event_whitelisted1, event_blacklisted1, children1 = await scan.helpers.resolve_event(wildcard_event1) - event_tags2, event_whitelisted2, event_blacklisted2, children2 = await scan.helpers.resolve_event(wildcard_event2) - event_tags3, event_whitelisted3, event_blacklisted3, children3 = await scan.helpers.resolve_event(wildcard_event3) - await helpers.handle_wildcard_event(wildcard_event1, children1) - await helpers.handle_wildcard_event(wildcard_event2, children2) - await helpers.handle_wildcard_event(wildcard_event3, children3) + await scan._prep() + dnsresolve = scan.modules["dnsresolve"] + await dnsresolve.handle_event(wildcard_event1) + await dnsresolve.handle_event(wildcard_event2) + await dnsresolve.handle_event(wildcard_event3) assert "wildcard" in wildcard_event1.tags assert "a-wildcard" in wildcard_event1.tags assert "srv-wildcard" not in wildcard_event1.tags @@ -192,7 +192,43 @@ async def test_wildcards(bbot_scanner): assert "srv-wildcard" not in wildcard_event2.tags assert wildcard_event1.data == "_wildcard.github.io" assert wildcard_event2.data == "_wildcard.github.io" - # TODO: re-enable this? - # assert "wildcard-domain" in wildcard_event3.tags - # assert "a-wildcard-domain" in wildcard_event3.tags - # assert "srv-wildcard-domain" not in wildcard_event3.tags + assert wildcard_event3.data == "github.io" + + from bbot.scanner import Scanner + + # test with full scan + scan2 = Scanner("asdfl.gashdgkjsadgsdf.github.io", config={"dnsresolve": True}) + events = [e async for e in scan2.async_start()] + assert len(events) == 2 + assert 1 == len([e for e in events if e.type == "SCAN"]) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" + and e.data == "_wildcard.github.io" + and all( + t in e.tags + for t in ( + "a-record", + "target", + "aaaa-wildcard", + "resolved", + "in-scope", + "subdomain", + "aaaa-record", + "wildcard", + "a-wildcard", + ) + ) + ] + ) + + # test with full scan (wildcard detection disabled for domain) + scan2 = Scanner("asdfl.gashdgkjsadgsdf.github.io", config={"dns_wildcard_ignore": ["github.io"]}) + events = [e async for e in scan2.async_start()] + assert len(events) == 2 + for e in events: + log.critical(e) + # assert 1 == len([e for e in events if e.type == "SCAN"]) + # assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "asdfl.gashdgkjsadgsdf.github.io" and all(t in e.tags for t in ('a-record', 'target', 'resolved', 'in-scope', 'subdomain', 'aaaa-record')) and not any(t in e.tags for t in ("wildcard", "a-wildcard", "aaaa-wildcard"))]) From 34b5d3e5a6353ee47d399fcab2badd16989b3270 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 10 Apr 2024 23:37:31 -0400 Subject: [PATCH 23/63] more work on hooks --- bbot/cli.py | 4 +- bbot/modules/base.py | 72 +- bbot/modules/internal/cloudcheck.py | 34 + bbot/modules/internal/dnsresolve.py | 12 +- bbot/scanner/manager.py | 616 +++++------------- bbot/scanner/scanner.py | 190 +++++- .../test_manager_scope_accuracy.py | 4 +- 7 files changed, 397 insertions(+), 535 deletions(-) create mode 100644 bbot/modules/internal/cloudcheck.py diff --git a/bbot/cli.py b/bbot/cli.py index 910b078d68..8e308d6f88 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -184,12 +184,12 @@ def handle_keyboard_input(keyboard_input): module = kill_match.group("module") if module in scan.modules: log.hugewarning(f'Killing module: "{module}"') - scan.manager.kill_module(module, message="killed by user") + scan.kill_module(module, message="killed by user") else: log.warning(f'Invalid module: "{module}"') else: scan.preset.core.logger.toggle_log_level(logger=log) - scan.manager.modules_status(_log=True) + scan.modules_status(_log=True) reader = asyncio.StreamReader() protocol = asyncio.StreamReaderProtocol(reader) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index 52d52d656b..dbf3f4ce4b 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -640,6 +640,8 @@ async def _worker(self): def max_scope_distance(self): if self.in_scope_only or self.target_only: return 0 + if self.scope_distance_modifier is None: + return 999 return max(0, self.scan.scope_search_distance + self.scope_distance_modifier) def _event_precheck(self, event): @@ -775,7 +777,7 @@ async def _cleanup(self): async with self.scan._acatch(context), self._task_counter.count(context): await self.helpers.execute_sync_or_async(callback) - async def queue_event(self, event, precheck=True): + async def queue_event(self, event): """ Asynchronously queues an incoming event to the module's event queue for further processing. @@ -798,9 +800,7 @@ async def queue_event(self, event, precheck=True): if self.incoming_event_queue is False: self.debug(f"Not in an acceptable state to queue incoming event") return - acceptable, reason = True, "precheck was skipped" - if precheck: - acceptable, reason = self._event_precheck(event) + acceptable, reason = self._event_precheck(event) if not acceptable: if reason and reason != "its type is not in watched_events": self.debug(f"Not queueing {event} because {reason}") @@ -812,7 +812,7 @@ async def queue_event(self, event, precheck=True): async with self._event_received: self._event_received.notify() if event.type != "FINISHED": - self.scan.manager._new_activity = True + self.scan._new_activity = True except AttributeError: self.debug(f"Not in an acceptable state to queue incoming event") @@ -1407,23 +1407,18 @@ class HookModule(BaseModule): suppress_dupes = False _hook = True - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._first = False - async def _worker(self): async with self.scan._acatch(context=self._worker, unhandled_is_critical=True): try: while not self.scan.stopping and not self.errored: - try: if self.incoming_event_queue is not False: incoming = await self.get_incoming_event() try: - event, _kwargs = incoming + event, kwargs = incoming except ValueError: event = incoming - _kwargs = {} + kwargs = {} else: self.debug(f"Event queue is in bad state") break @@ -1453,25 +1448,25 @@ async def _worker(self): # whether to pass the event on to the rest of the scan # defaults to true, unless handle_event returns False - pass_on_event = True - pass_on_event_reason = "" + forward_event = True + forward_event_reason = "" if acceptable: - context = f"{self.name}.handle_event({event})" + context = f"{self.name}.handle_event({event, kwargs})" self.scan.stats.event_consumed(event, self) self.debug(f"Hooking {event}") async with self.scan._acatch(context), self._task_counter.count(context): - pass_on_event = await self.handle_event(event) + forward_event = await self.handle_event(event, kwargs) with suppress(ValueError, TypeError): - pass_on_event, pass_on_event_reason = pass_on_event + forward_event, forward_event_reason = forward_event self.debug(f"Finished hooking {event}") - if pass_on_event is False: - self.debug(f"Not passing on {event} because {pass_on_event_reason}") - return + if forward_event is False: + self.debug(f"Not forwarding {event} because {forward_event_reason}") + continue - await self.outgoing_event_queue.put((event, _kwargs)) + await self.forward_event(event, kwargs) except asyncio.CancelledError: self.log.trace("Worker cancelled") @@ -1479,18 +1474,33 @@ async def _worker(self): self.log.trace(f"Worker stopped") async def get_incoming_event(self): - try: - return self.incoming_event_queue.get_nowait() - except asyncio.queues.QueueEmpty: - if self._first: - return self.scan.manager.get_event_from_modules() - raise + """ + Get an event from this module's incoming event queue + """ + return await self.incoming_event_queue.get() - async def queue_event(self, event, precheck=False): + async def forward_event(self, event, kwargs): + """ + Used for forwarding the event on to the next hook module + """ + await self.outgoing_event_queue.put((event, kwargs)) + + async def queue_outgoing_event(self, event, **kwargs): + """ + Used by emit_event() to raise new events to the scan + """ + # if this was a normal module, we'd put it in the outgoing queue + # but because it's a hook module, we need to queue it with the first hook module + await self.scan.ingress_module.queue_event(event, kwargs) + + async def queue_event(self, event, kwargs=None): + """ + Put an event in this module's incoming event queue + """ + if kwargs is None: + kwargs = {} try: - self.incoming_event_queue.put_nowait(event) - if event.type != "FINISHED": - self.scan.manager._new_activity = True + self.incoming_event_queue.put_nowait((event, kwargs)) except AttributeError: self.debug(f"Not in an acceptable state to queue incoming event") diff --git a/bbot/modules/internal/cloudcheck.py b/bbot/modules/internal/cloudcheck.py new file mode 100644 index 0000000000..85dca28aaa --- /dev/null +++ b/bbot/modules/internal/cloudcheck.py @@ -0,0 +1,34 @@ +from bbot.modules.base import HookModule + + +class cloudcheck(HookModule): + watched_events = ["*"] + scope_distance_modifier = 1 + _priority = 3 + + async def filter_event(self, event): + if (not event.host) or (event.type in ("IP_RANGE",)): + return False, "event does not have host attribute" + return True + + async def handle_event(self, event, kwargs): + + # skip if we're in tests + if self.helpers.in_tests: + return + + # cloud tagging by main host + await self.scan.helpers.cloud.tag_event(event) + + # cloud tagging by resolved hosts + to_check = set() + if event.type == "IP_ADDRESS": + to_check.add(event.host) + for rdtype, hosts in event.dns_children.items(): + if rdtype in ("A", "AAAA"): + for host in hosts: + to_check.add(host) + for host in to_check: + provider, provider_type, subnet = self.helpers.cloudcheck(host) + if provider: + event.add_tag(f"{provider_type}-{provider}") diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index 9ff52e15e1..e1771382d3 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -33,7 +33,7 @@ async def filter_event(self, event): return False, "event does not have host attribute" return True - async def handle_event(self, event): + async def handle_event(self, event, kwargs): dns_tags = set() dns_children = dict() event_whitelisted = False @@ -144,12 +144,6 @@ async def handle_event(self, event): # If the event is unresolved, change its type to DNS_NAME_UNRESOLVED if event.type == "DNS_NAME" and "unresolved" in event.tags and not "target" in event.tags: event.type = "DNS_NAME_UNRESOLVED" - else: - # otherwise, check for wildcards - if event.scope_distance <= self.scan.scope_search_distance: - if not "unresolved" in event.tags: - if not self.helpers.is_ip_type(event.host): - await self.helpers.dns.handle_wildcard_event(event) # speculate DNS_NAMES and IP_ADDRESSes from other event types source_event = event @@ -164,7 +158,7 @@ async def handle_event(self, event): source_event.scope_distance = event.scope_distance if "target" in event.tags: source_event.add_tag("target") - self.scan.manager.queue_event(source_event) + await self.emit_event(source_event) # emit DNS children if emit_children: @@ -188,7 +182,7 @@ async def handle_event(self, event): ) for child_event in dns_child_events: self.debug(f"Queueing DNS child for {event}: {child_event}") - self.scan.manager.queue_event(child_event) + await self.emit_event(child_event) async def handle_wildcard_event(self, event): self.debug(f"Entering handle_wildcard_event({event}, children={event.dns_children})") diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index dd964825c3..267175b212 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -1,84 +1,35 @@ import asyncio import logging -import traceback from contextlib import suppress -from ..errors import ValidationError -from ..core.helpers.async_helpers import TaskCounter, ShuffleQueue +from bbot.modules.base import HookModule log = logging.getLogger("bbot.scanner.manager") -class ScanManager: +class ScanIngress(HookModule): """ - Manages the modules, event queues, and overall event flow during a scan. - - Simultaneously serves as a policeman, judge, jury, and executioner for events. - It is responsible for managing the incoming event queue and distributing events to modules. - - Attributes: - scan (Scan): Reference to the Scan object that instantiated the ScanManager. - incoming_event_queue (ShuffleQueue): Queue storing incoming events for processing. - events_distributed (set): Set tracking globally unique events. - events_accepted (set): Set tracking events accepted by individual modules. - dns_resolution (bool): Flag to enable or disable DNS resolution. - _task_counter (TaskCounter): Counter for ongoing tasks. - _new_activity (bool): Flag indicating new activity. - _modules_by_priority (dict): Modules sorted by their priorities. - _incoming_queues (list): List of incoming event queues from each module. - _module_priority_weights (list): Weight values for each module based on priority. + This is always the first hook module in the chain, responsible for basic scope checks """ - def __init__(self, scan): - """ - Initializes the ScanManager object, setting up essential attributes for scan management. - - Args: - scan (Scan): Reference to the Scan object that instantiated the ScanManager. - """ + watched_events = ["*"] + # accept all events regardless of scope distance + scope_distance_modifier = None + _name = "_scan_ingress" - self.scan = scan - self.preset = scan.preset + @property + def priority(self): + # we are the highest priority + return -99 - self.incoming_event_queue = ShuffleQueue() + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._module_priority_weights = None + self._non_hook_modules = None # track incoming duplicates module-by-module (for `suppress_dupes` attribute of modules) self.incoming_dup_tracker = set() - # track outgoing duplicates (for `accept_dupes` attribute of modules) - self.outgoing_dup_tracker = set() - self.dns_resolution = self.scan.config.get("dns_resolution", False) - self._task_counter = TaskCounter() - self._new_activity = True - self._modules_by_priority = None - self._hook_modules = None - self._non_hook_modules = None - self._incoming_queues = None - self._module_priority_weights = None - async def _worker_loop(self): - try: - while not self.scan.stopped: - try: - async with self._task_counter.count("get_event_from_modules()"): - # if we have hooks set up, we always get events from the last (lowest priority) hook module. - if self.hook_modules: - last_hook_module = self.hook_modules[-1] - event, kwargs = last_hook_module.outgoing_event_queue.get_nowait() - else: - # otherwise, we go through all the modules - event, kwargs = self.get_event_from_modules() - except asyncio.queues.QueueEmpty: - await asyncio.sleep(0.1) - continue - async with self._task_counter.count(f"emit_event({event})"): - emit_event_task = asyncio.create_task( - self.emit_event(event, **kwargs), name=f"emit_event({event})" - ) - await emit_event_task - - except Exception: - log.critical(traceback.format_exc()) - - async def init_events(self): + async def init_events(self, events): """ Initializes events by seeding the scanner with target events and distributing them for further processing. @@ -86,11 +37,8 @@ async def init_events(self): - This method populates the event queue with initial target events. - It also marks the Scan object as finished with initialization by setting `_finished_init` to True. """ - - context = f"manager.init_events()" - async with self.scan._acatch(context), self._task_counter.count(context): - - sorted_events = sorted(self.scan.target.events, key=lambda e: len(e.data)) + async with self.scan._acatch(self.init_events), self._task_counter.count(self.init_events): + sorted_events = sorted(events, key=lambda e: len(e.data)) for event in [self.scan.root_event] + sorted_events: event._dummy = False event.scope_distance = 0 @@ -100,146 +48,67 @@ async def init_events(self): event.source = self.scan.root_event if event.module is None: event.module = self.scan._make_dummy_module(name="TARGET", _type="TARGET") - self.scan.verbose(f"Target: {event}") - if self.hook_modules: - first_hook_module = self.hook_modules[0] - await first_hook_module.queue_event(event) - else: - self.queue_event(event) + self.verbose(f"Target: {event}") + await self.queue_event(event, {}) await asyncio.sleep(0.1) self.scan._finished_init = True - async def emit_event(self, event, *args, **kwargs): - """ - TODO: Register + kill duplicate events immediately? - bbot.scanner: scan._event_thread_pool: running for 0 seconds: ScanManager._emit_event(DNS_NAME("sipfed.online.lync.com")) - bbot.scanner: scan._event_thread_pool: running for 0 seconds: ScanManager._emit_event(DNS_NAME("sipfed.online.lync.com")) - bbot.scanner: scan._event_thread_pool: running for 0 seconds: ScanManager._emit_event(DNS_NAME("sipfed.online.lync.com")) - """ - callbacks = ["abort_if", "on_success_callback"] - callbacks_requested = any([kwargs.get(k, None) is not None for k in callbacks]) - # "quick" queues the event immediately - # This is used by speculate - quick_kwarg = kwargs.pop("quick", False) - quick_event = getattr(event, "quick_emit", False) - quick = (quick_kwarg or quick_event) and not callbacks_requested - - # skip event if it fails precheck - acceptable = self._event_precheck(event) - if not acceptable: - return - - log.debug(f'Module "{event.module}" raised {event}') - - if quick: - log.debug(f"Quick-emitting {event}") - for kwarg in callbacks: - kwargs.pop(kwarg, None) - async with self.scan._acatch(context=self.distribute_event): - await self.distribute_event(event) - else: - async with self.scan._acatch(context=self._emit_event): - await self._emit_event( - event, - *args, - **kwargs, - ) - - def _event_precheck(self, event): - """ - Check an event to see if we can skip it to save on performance - """ + async def handle_event(self, event, kwargs): + # don't accept dummy events if event._dummy: - log.warning(f"Cannot emit dummy event: {event}") - return False + return False, "cannot emit dummy event" + + # don't accept events with self as source if (not event.type == "SCAN") and (event == event.get_source()): - log.debug(f"Skipping event with self as source: {event}") - return False - if event._graph_important: - return True - if self.is_incoming_duplicate(event, add=True): - log.debug(f"Skipping event because it was already emitted by its module: {event}") - return False - return True + return False, "event's source is itself" - async def _emit_event(self, event, **kwargs): - """ - Handles the emission, tagging, and distribution of a events during a scan. + # don't accept duplicates + if (not event._graph_important) and self.is_incoming_duplicate(event, add=True): + return False, "event was already emitted by its module" - A lot of really important stuff happens here. Actually this is probably the most - important method in all of BBOT. It is basically the central intersection that - every event passes through. + # update event's scope distance based on its parent + event.scope_distance = event.source.scope_distance + 1 - It exists in a delicate balance. Close to half of my debugging time has been spent - in this function. I have slain many dragons here and there may still be more yet to slay. + # blacklist rejections + event_blacklisted = self.scan.blacklisted(event) + if event_blacklisted or "blacklisted" in event.tags: + return False, f"Omitting blacklisted event: {event}" - Tread carefully, friend. -TheTechromancer + # Scope shepherding + # here is where we make sure in-scope events are set to their proper scope distance + event_whitelisted = self.scan.whitelisted(event) + if event.host and event_whitelisted: + log.debug(f"Making {event} in-scope because it matches the scan target") + event.scope_distance = 0 - Notes: - - Central function for decision-making in BBOT. - - Conducts DNS resolution, tagging, and scope calculations. - - Checks against whitelists and blacklists. - - Calls custom callbacks. - - Handles DNS wildcard events. - - Decides on event acceptance and distribution. - - Parameters: - event (Event): The event object to be emitted. - **kwargs: Arbitrary keyword arguments (e.g., `on_success_callback`, `abort_if`). - - Side Effects: - - Event tagging. - - Populating DNS data. - - Emitting new events. - - Queueing events for further processing. - - Adjusting event scopes. - - Running callbacks. - - Updating scan statistics. - """ - log.debug(f"Emitting {event}") - try: - on_success_callback = kwargs.pop("on_success_callback", None) - abort_if = kwargs.pop("abort_if", None) - - # blacklist rejections - event_blacklisted = self.scan.blacklisted(event) - if event_blacklisted or "blacklisted" in event.tags: - log.debug(f"Omitting blacklisted event: {event}") - return - - # Scope shepherding - # here is where we make sure in-scope events are set to their proper scope distance - event_whitelisted = self.scan.whitelisted(event) - if event.host and event_whitelisted: - log.debug(f"Making {event} in-scope because it matches the scan target") - event.scope_distance = 0 + # nerf event's priority if it's not in scope + event.module_priority += event.scope_distance + + @property + def non_hook_modules(self): + if self._non_hook_modules is None: + self._non_hook_modules = [m for m in self.scan.modules.values() if not m._hook] + return self._non_hook_modules - # now that the event is properly tagged, we can finally make decisions about it - abort_result = False - if callable(abort_if): - async with self.scan._acatch(context=abort_if): - abort_result = await self.scan.helpers.execute_sync_or_async(abort_if, event) - msg = f"{event.module}: not raising event {event} due to custom criteria in abort_if()" - with suppress(ValueError, TypeError): - abort_result, reason = abort_result - msg += f": {reason}" - if abort_result: - log.verbose(msg) - return - - # run success callback before distributing event (so it can add tags, etc.) - if callable(on_success_callback): - async with self.scan._acatch(context=on_success_callback): - await self.scan.helpers.execute_sync_or_async(on_success_callback, event) - - await self.distribute_event(event) - - except ValidationError as e: - log.warning(f"Event validation failed with kwargs={kwargs}: {e}") - log.trace(traceback.format_exc()) - - finally: - log.debug(f"{event.module}.emit_event() finished for {event}") + @property + def incoming_queues(self): + return [self.incoming_event_queue] + [m.outgoing_event_queue for m in self.non_hook_modules] + + @property + def module_priority_weights(self): + if not self._module_priority_weights: + # we subtract from six because lower priorities == higher weights + priorities = [5] + [6 - m.priority for m in self.non_hook_modules] + self._module_priority_weights = priorities + return self._module_priority_weights + + async def get_incoming_event(self): + for q in self.helpers.weighted_shuffle(self.incoming_queues, self.module_priority_weights): + try: + return q.get_nowait() + except (asyncio.queues.QueueEmpty, AttributeError): + continue + raise asyncio.queues.QueueEmpty() def is_incoming_duplicate(self, event, add=False): """ @@ -258,6 +127,89 @@ def is_incoming_duplicate(self, event, add=False): return True return False + +class ScanEgress(HookModule): + """ + This is always the last hook module in the chain, responsible for executing and acting on the + `abort_if` and `on_success_callback` functions. + """ + + watched_events = ["*"] + # accept all events regardless of scope distance + scope_distance_modifier = None + _name = "_scan_egress" + + @property + def priority(self): + # we are the lowest priority + return 99 + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # track outgoing duplicates (for `accept_dupes` attribute of modules) + self.outgoing_dup_tracker = set() + + async def handle_event(self, event, kwargs): + abort_if = kwargs.pop("abort_if", None) + on_success_callback = kwargs.pop("on_success_callback", None) + + # make event internal if it's above our configured report distance + event_in_report_distance = event.scope_distance <= self.scan.scope_report_distance + event_will_be_output = event.always_emit or event_in_report_distance + if not event_will_be_output: + log.debug( + f"Making {event} internal because its scope_distance ({event.scope_distance}) > scope_report_distance ({self.scan.scope_report_distance})" + ) + event.internal = True + + # if we discovered something interesting from an internal event, + # make sure we preserve its chain of parents + source = event.source + if source.internal and ((not event.internal) or event._graph_important): + source_in_report_distance = source.scope_distance <= self.scan.scope_report_distance + if source_in_report_distance: + source.internal = False + if not source._graph_important: + source._graph_important = True + log.debug(f"Re-queuing internal event {source} with parent {event}") + self.queue_event(source) + + abort_result = False + if callable(abort_if): + async with self.scan._acatch(context=abort_if): + abort_result = await self.scan.helpers.execute_sync_or_async(abort_if, event) + msg = f"{event.module}: not raising event {event} due to custom criteria in abort_if()" + with suppress(ValueError, TypeError): + abort_result, reason = abort_result + msg += f": {reason}" + if abort_result: + return False, msg + + # run success callback before distributing event (so it can add tags, etc.) + if callable(on_success_callback): + async with self.scan._acatch(context=on_success_callback): + await self.scan.helpers.execute_sync_or_async(on_success_callback, event) + + async def forward_event(self, event, kwargs): + """ + Queue event with modules + """ + is_outgoing_duplicate = self.is_outgoing_duplicate(event) + if is_outgoing_duplicate: + self.verbose(f"{event.module}: Duplicate event: {event}") + # absorb event into the word cloud if it's in scope + if not is_outgoing_duplicate and -1 < event.scope_distance < 1: + self.scan.word_cloud.absorb_event(event) + + for mod in self.scan.modules.values(): + # don't distribute events to hook modules + if mod._hook: + continue + acceptable_dup = (not is_outgoing_duplicate) or mod.accept_dupes + graph_important = mod._is_graph_important(event) + if acceptable_dup or graph_important: + await mod.queue_event(event) + def is_outgoing_duplicate(self, event, add=False): """ Calculate whether an event is a duplicate in the context of the whole scan, @@ -270,269 +222,3 @@ def is_outgoing_duplicate(self, event, add=False): if add: self.outgoing_dup_tracker.add(event_hash) return is_dup - - async def distribute_event(self, event): - """ - Queue event with modules - """ - async with self.scan._acatch(context=self.distribute_event): - # make event internal if it's above our configured report distance - event_in_report_distance = event.scope_distance <= self.scan.scope_report_distance - event_will_be_output = event.always_emit or event_in_report_distance - if not event_will_be_output: - log.debug( - f"Making {event} internal because its scope_distance ({event.scope_distance}) > scope_report_distance ({self.scan.scope_report_distance})" - ) - event.internal = True - - # if we discovered something interesting from an internal event, - # make sure we preserve its chain of parents - source = event.source - if source.internal and ((not event.internal) or event._graph_important): - source_in_report_distance = source.scope_distance <= self.scan.scope_report_distance - if source_in_report_distance: - source.internal = False - if not source._graph_important: - source._graph_important = True - log.debug(f"Re-queuing internal event {source} with parent {event}") - self.queue_event(source) - - is_outgoing_duplicate = self.is_outgoing_duplicate(event) - if is_outgoing_duplicate: - self.scan.verbose(f"{event.module}: Duplicate event: {event}") - # absorb event into the word cloud if it's in scope - if not is_outgoing_duplicate and -1 < event.scope_distance < 1: - self.scan.word_cloud.absorb_event(event) - for mod in self.scan.modules.values(): - # don't distribute events to hook modules - if mod._hook: - continue - acceptable_dup = (not is_outgoing_duplicate) or mod.accept_dupes - # graph_important = mod._type == "output" and event._graph_important == True - graph_important = mod._is_graph_important(event) - if acceptable_dup or graph_important: - await mod.queue_event(event) - - def kill_module(self, module_name, message=None): - from signal import SIGINT - - module = self.scan.modules[module_name] - module.set_error_state(message=message, clear_outgoing_queue=True) - for proc in module._proc_tracker: - with suppress(Exception): - proc.send_signal(SIGINT) - self.scan.helpers.cancel_tasks_sync(module._tasks) - - @property - def modules_by_priority(self): - if not self._modules_by_priority: - self._modules_by_priority = sorted(list(self.scan.modules.values()), key=lambda m: m.priority) - return self._modules_by_priority - - @property - def incoming_queues(self): - if not self._incoming_queues: - queues_by_priority = [m.outgoing_event_queue for m in self.modules_by_priority if not m._hook] - self._incoming_queues = [self.incoming_event_queue] + queues_by_priority - return self._incoming_queues - - @property - def incoming_qsize(self): - incoming_events = 0 - for q in self.incoming_queues: - incoming_events += q.qsize() - return incoming_events - - @property - def module_priority_weights(self): - if not self._module_priority_weights: - # we subtract from six because lower priorities == higher weights - priorities = [5] + [6 - m.priority for m in self.modules_by_priority if not m._hook] - self._module_priority_weights = priorities - return self._module_priority_weights - - @property - def hook_modules(self): - if self._hook_modules is None: - self._hook_modules = [m for m in self.modules_by_priority if m._hook] - if self._hook_modules: - self._hook_modules[0]._first = True - return self._hook_modules - - @property - def non_hook_modules(self): - if self._non_hook_modules is None: - self._non_hook_modules = [m for m in self.modules_by_priority if not m._hook] - return self._non_hook_modules - - def get_event_from_modules(self): - for q in self.scan.helpers.weighted_shuffle(self.incoming_queues, self.module_priority_weights): - try: - return q.get_nowait() - except (asyncio.queues.QueueEmpty, AttributeError): - continue - raise asyncio.queues.QueueEmpty() - - @property - def queued_event_types(self): - event_types = {} - for q in self.incoming_queues: - for event, _ in q._queue: - event_type = getattr(event, "type", None) - if event_type is not None: - try: - event_types[event_type] += 1 - except KeyError: - event_types[event_type] = 1 - return event_types - - def queue_event(self, event, **kwargs): - if event: - # nerf event's priority if it's likely not to be in scope - if event.scope_distance > 0: - event_in_scope = self.scan.whitelisted(event) and not self.scan.blacklisted(event) - if not event_in_scope: - event.module_priority += event.scope_distance - # update event's scope distance based on its parent - event.scope_distance = event.source.scope_distance + 1 - self.incoming_event_queue.put_nowait((event, kwargs)) - - @property - def running(self): - active_tasks = self._task_counter.value - incoming_events = self.incoming_qsize - return active_tasks > 0 or incoming_events > 0 - - @property - def modules_finished(self): - finished_modules = [m.finished for m in self.scan.modules.values()] - return all(finished_modules) - - @property - def active(self): - return self.running or not self.modules_finished - - def modules_status(self, _log=False): - finished = True - status = {"modules": {}} - - for m in self.scan.modules.values(): - mod_status = m.status - if mod_status["running"]: - finished = False - status["modules"][m.name] = mod_status - - for mod in self.scan.modules.values(): - if mod.errored and mod.incoming_event_queue not in [None, False]: - with suppress(Exception): - mod.set_error_state() - - status["finished"] = finished - - modules_errored = [m for m, s in status["modules"].items() if s["errored"]] - - max_mem_percent = 90 - mem_status = self.scan.helpers.memory_status() - # abort if we don't have the memory - mem_percent = mem_status.percent - if mem_percent > max_mem_percent: - free_memory = mem_status.available - free_memory_human = self.scan.helpers.bytes_to_human(free_memory) - self.scan.warning(f"System memory is at {mem_percent:.1f}% ({free_memory_human} remaining)") - - if _log: - modules_status = [] - for m, s in status["modules"].items(): - running = s["running"] - incoming = s["events"]["incoming"] - outgoing = s["events"]["outgoing"] - tasks = s["tasks"] - total = sum([incoming, outgoing, tasks]) - if running or total > 0: - modules_status.append((m, running, incoming, outgoing, tasks, total)) - modules_status.sort(key=lambda x: x[-1], reverse=True) - - if modules_status: - modules_status_str = ", ".join([f"{m}({i:,}:{t:,}:{o:,})" for m, r, i, o, t, _ in modules_status]) - self.scan.info( - f"{self.scan.name}: Modules running (incoming:processing:outgoing) {modules_status_str}" - ) - else: - self.scan.info(f"{self.scan.name}: No modules running") - event_type_summary = sorted( - self.scan.stats.events_emitted_by_type.items(), key=lambda x: x[-1], reverse=True - ) - if event_type_summary: - self.scan.info( - f'{self.scan.name}: Events produced so far: {", ".join([f"{k}: {v}" for k,v in event_type_summary])}' - ) - else: - self.scan.info(f"{self.scan.name}: No events produced yet") - - if modules_errored: - self.scan.verbose( - f'{self.scan.name}: Modules errored: {len(modules_errored):,} ({", ".join([m for m in modules_errored])})' - ) - - queued_events_by_type = [(k, v) for k, v in self.queued_event_types.items() if v > 0] - if queued_events_by_type: - queued_events_by_type.sort(key=lambda x: x[-1], reverse=True) - queued_events_by_type_str = ", ".join(f"{m}: {t:,}" for m, t in queued_events_by_type) - num_queued_events = sum(v for k, v in queued_events_by_type) - self.scan.info( - f"{self.scan.name}: {num_queued_events:,} events in queue ({queued_events_by_type_str})" - ) - else: - self.scan.info(f"{self.scan.name}: No events in queue") - - if self.scan.log_level <= logging.DEBUG: - # status debugging - scan_active_status = [] - scan_active_status.append(f"scan._finished_init: {self.scan._finished_init}") - scan_active_status.append(f"manager.active: {self.active}") - scan_active_status.append(f" manager.running: {self.running}") - scan_active_status.append(f" manager._task_counter.value: {self._task_counter.value}") - scan_active_status.append(f" manager._task_counter.tasks:") - for task in list(self._task_counter.tasks.values()): - scan_active_status.append(f" - {task}:") - scan_active_status.append( - f" manager.incoming_event_queue.qsize: {self.incoming_event_queue.qsize()}" - ) - scan_active_status.append(f" manager.modules_finished: {self.modules_finished}") - for m in sorted(self.scan.modules.values(), key=lambda m: m.name): - running = m.running - scan_active_status.append(f" {m}.finished: {m.finished}") - scan_active_status.append(f" running: {running}") - if running: - scan_active_status.append(f" tasks:") - for task in list(m._task_counter.tasks.values()): - scan_active_status.append(f" - {task}:") - scan_active_status.append(f" incoming_queue_size: {m.num_incoming_events}") - scan_active_status.append(f" outgoing_queue_size: {m.outgoing_event_queue.qsize()}") - for line in scan_active_status: - self.scan.debug(line) - - # log module memory usage - module_memory_usage = [] - for module in self.scan.modules.values(): - memory_usage = module.memory_usage - module_memory_usage.append((module.name, memory_usage)) - module_memory_usage.sort(key=lambda x: x[-1], reverse=True) - self.scan.debug(f"MODULE MEMORY USAGE:") - for module_name, usage in module_memory_usage: - self.scan.debug(f" - {module_name}: {self.scan.helpers.bytes_to_human(usage)}") - - # Uncomment these lines to enable debugging of event queues - - # queued_events = self.incoming_event_queue.events - # if queued_events: - # queued_events_str = ", ".join(str(e) for e in queued_events) - # self.scan.verbose(f"Queued events: {queued_events_str}") - # queued_events_by_module = [(k, v) for k, v in self.incoming_event_queue.modules.items() if v > 0] - # queued_events_by_module.sort(key=lambda x: x[-1], reverse=True) - # queued_events_by_module_str = ", ".join(f"{m}: {t:,}" for m, t in queued_events_by_module) - # self.scan.verbose(f"{self.scan.name}: Queued events by module: {queued_events_by_module_str}") - - status.update({"modules_errored": len(modules_errored)}) - - return status diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 42a199d577..13b3bfde84 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -17,9 +17,9 @@ from .preset import Preset from .stats import ScanStats -from .manager import ScanManager from .dispatcher import Dispatcher from bbot.core.event import make_event +from .manager import ScanIngress, ScanEgress from bbot.core.helpers.misc import sha1, rand_string from bbot.core.helpers.names_generator import random_name from bbot.core.helpers.async_helpers import async_to_sync_gen @@ -74,7 +74,6 @@ class Scanner: helpers (ConfigAwareHelper): Helper containing various reusable functions, regexes, etc. (alias to `self.preset.helpers`). output_dir (pathlib.Path): Output directory for scan (alias to `self.preset.output_dir`). name (str): Name of scan (alias to `self.preset.scan_name`). - manager (ScanManager): Coordinates and monitors the flow of events between modules during a scan. dispatcher (Dispatcher): Triggers certain events when the scan `status` changes. modules (dict): Holds all loaded modules in this format: `{"module_name": Module()}`. stats (ScanStats): Holds high-level scan statistics such as how many events have been produced and consumed by each module. @@ -177,7 +176,6 @@ def __init__( self.dispatcher = dispatcher self.dispatcher.set_scan(self) - self.manager = ScanManager(self) self.stats = ScanStats(self) # scope distance @@ -200,6 +198,7 @@ def __init__( self._prepped = False self._finished_init = False + self._new_activity = False self._cleanedup = False self.__loop = None @@ -308,17 +307,12 @@ async def async_start(self): await self.dispatcher.on_start(self) - # start manager worker loops - self._manager_worker_loop_tasks = [ - asyncio.create_task(self.manager._worker_loop()) for _ in range(self.max_workers) - ] - self.status = "RUNNING" self._start_modules() self.verbose(f"{len(self.modules):,} modules started") # distribute seed events - self.init_events_task = asyncio.create_task(self.manager.init_events()) + self.init_events_task = asyncio.create_task(self.ingress_module.init_events(self.target.events)) # main scan loop while 1: @@ -334,7 +328,7 @@ async def async_start(self): yield e # break if initialization finished and the scan is no longer active - if self._finished_init and not self.manager.active: + if self._finished_init and self.modules_finished: new_activity = await self.finish() if not new_activity: break @@ -385,16 +379,6 @@ async def async_start(self): def _start_modules(self): self.verbose(f"Starting module worker loops") - - # hook modules get sewn together like human centipede - if len(self.manager.hook_modules) > 1: - for i, hook_module in enumerate(self.manager.hook_modules[:-1]): - next_hook_module = self.manager.hook_modules[i + 1] - self.debug( - f"Setting hook module {hook_module.name}.outgoing_event_queue to next hook module {next_hook_module.name}.incoming_event_queue" - ) - hook_module._outgoing_event_queue = next_hook_module.incoming_event_queue - for module in self.modules.values(): module.start() @@ -520,9 +504,164 @@ async def load_modules(self): f"Loaded {len(loaded_output_modules):,}/{len(self.preset.output_modules):,} output modules, ({','.join(loaded_output_modules)})" ) - self.modules = OrderedDict(sorted(self.modules.items(), key=lambda x: getattr(x[-1], "_priority", 0))) + # builtin hook modules + self.ingress_module = ScanIngress(self) + self.egress_module = ScanEgress(self) + self.modules[self.ingress_module.name] = self.ingress_module + self.modules[self.egress_module.name] = self.egress_module + + # sort modules by priority + self.modules = OrderedDict(sorted(self.modules.items(), key=lambda x: getattr(x[-1], "priority", 3))) + + self.critical(list(self.modules)) + + # hook modules get sewn together like human centipede + self.hook_modules = [m for m in self.modules.values() if m._hook] + for i, hook_module in enumerate(self.hook_modules[:-1]): + next_hook_module = self.hook_modules[i + 1] + self.debug( + f"Setting hook module {hook_module.name}.outgoing_event_queue to next hook module {next_hook_module.name}.incoming_event_queue" + ) + hook_module._outgoing_event_queue = next_hook_module.incoming_event_queue + self._modules_loaded = True + @property + def modules_finished(self): + finished_modules = [m.finished for m in self.modules.values()] + return all(finished_modules) + + def kill_module(self, module_name, message=None): + from signal import SIGINT + + module = self.modules[module_name] + module.set_error_state(message=message, clear_outgoing_queue=True) + for proc in module._proc_tracker: + with contextlib.suppress(Exception): + proc.send_signal(SIGINT) + self.helpers.cancel_tasks_sync(module._tasks) + + @property + def queued_event_types(self): + event_types = {} + queues = set() + + for module in self.modules.values(): + queues.add(module.incoming_event_queue) + queues.add(module.outgoing_event_queue) + + for q in queues: + for event, _ in q._queue: + event_type = getattr(event, "type", None) + if event_type is not None: + try: + event_types[event_type] += 1 + except KeyError: + event_types[event_type] = 1 + + return event_types + + def modules_status(self, _log=False): + finished = True + status = {"modules": {}} + + sorted_modules = [] + for module_name, module in self.modules.items(): + # if module_name.startswith("_"): + # continue + sorted_modules.append(module) + mod_status = module.status + if mod_status["running"]: + finished = False + status["modules"][module_name] = mod_status + + # sort modules by name + sorted_modules.sort(key=lambda m: m.name) + + status["finished"] = finished + + modules_errored = [m for m, s in status["modules"].items() if s["errored"]] + + max_mem_percent = 90 + mem_status = self.helpers.memory_status() + # abort if we don't have the memory + mem_percent = mem_status.percent + if mem_percent > max_mem_percent: + free_memory = mem_status.available + free_memory_human = self.helpers.bytes_to_human(free_memory) + self.warning(f"System memory is at {mem_percent:.1f}% ({free_memory_human} remaining)") + + if _log: + modules_status = [] + for m, s in status["modules"].items(): + running = s["running"] + incoming = s["events"]["incoming"] + outgoing = s["events"]["outgoing"] + tasks = s["tasks"] + total = sum([incoming, outgoing, tasks]) + if running or total > 0: + modules_status.append((m, running, incoming, outgoing, tasks, total)) + modules_status.sort(key=lambda x: x[-1], reverse=True) + + if modules_status: + modules_status_str = ", ".join([f"{m}({i:,}:{t:,}:{o:,})" for m, r, i, o, t, _ in modules_status]) + self.info(f"{self.name}: Modules running (incoming:processing:outgoing) {modules_status_str}") + else: + self.info(f"{self.name}: No modules running") + event_type_summary = sorted(self.stats.events_emitted_by_type.items(), key=lambda x: x[-1], reverse=True) + if event_type_summary: + self.info( + f'{self.name}: Events produced so far: {", ".join([f"{k}: {v}" for k,v in event_type_summary])}' + ) + else: + self.info(f"{self.name}: No events produced yet") + + if modules_errored: + self.verbose( + f'{self.name}: Modules errored: {len(modules_errored):,} ({", ".join([m for m in modules_errored])})' + ) + + queued_events_by_type = [(k, v) for k, v in self.queued_event_types.items() if v > 0] + if queued_events_by_type: + queued_events_by_type.sort(key=lambda x: x[-1], reverse=True) + queued_events_by_type_str = ", ".join(f"{m}: {t:,}" for m, t in queued_events_by_type) + num_queued_events = sum(v for k, v in queued_events_by_type) + self.info(f"{self.name}: {num_queued_events:,} events in queue ({queued_events_by_type_str})") + else: + self.info(f"{self.name}: No events in queue") + + if self.log_level <= logging.DEBUG: + # status debugging + scan_active_status = [] + scan_active_status.append(f"scan._finished_init: {self._finished_init}") + scan_active_status.append(f"scan.modules_finished: {self.modules_finished}") + for m in sorted_modules: + running = m.running + scan_active_status.append(f" {m}.finished: {m.finished}") + scan_active_status.append(f" running: {running}") + if running: + scan_active_status.append(f" tasks:") + for task in list(m._task_counter.tasks.values()): + scan_active_status.append(f" - {task}:") + scan_active_status.append(f" incoming_queue_size: {m.num_incoming_events}") + scan_active_status.append(f" outgoing_queue_size: {m.outgoing_event_queue.qsize()}") + for line in scan_active_status: + self.debug(line) + + # log module memory usage + module_memory_usage = [] + for module in sorted_modules: + memory_usage = module.memory_usage + module_memory_usage.append((module.name, memory_usage)) + module_memory_usage.sort(key=lambda x: x[-1], reverse=True) + self.debug(f"MODULE MEMORY USAGE:") + for module_name, usage in module_memory_usage: + self.debug(f" - {module_name}: {self.helpers.bytes_to_human(usage)}") + + status.update({"modules_errored": len(modules_errored)}) + + return status + def stop(self): """Stops the in-progress scan and performs necessary cleanup. @@ -555,8 +694,8 @@ async def finish(self): This method alters the scan's status to "FINISHING" if new activity is detected. """ # if new events were generated since last time we were here - if self.manager._new_activity: - self.manager._new_activity = False + if self._new_activity: + self._new_activity = False self.status = "FINISHING" # Trigger .finished() on every module and start over log.info("Finishing scan") @@ -587,9 +726,6 @@ def _drain_queues(self): while 1: if module.outgoing_event_queue: module.outgoing_event_queue.get_nowait() - with contextlib.suppress(asyncio.queues.QueueEmpty): - while 1: - self.manager.incoming_event_queue.get_nowait() self.debug("Finished draining queues") def _cancel_tasks(self): @@ -997,7 +1133,7 @@ async def _status_ticker(self, interval=15): async with self._acatch(): while 1: await asyncio.sleep(interval) - self.manager.modules_status(_log=True) + self.modules_status(_log=True) @contextlib.asynccontextmanager async def _acatch(self, context="scan", finally_callback=None, unhandled_is_critical=False): diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index 2d4fece4fe..da1b87037b 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -177,7 +177,9 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test2.notrealzies"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) - assert len(all_events) == 9 + # assert len(all_events) == 9 + for e in all_events: + log.critical(e) assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0]) assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal == True and e.scope_distance == 1]) assert 2 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal == True and e.scope_distance == 1]) From eae2dad3d12aed5cf684bc5f00d2a142ea6383a3 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 10 Apr 2024 23:38:54 -0400 Subject: [PATCH 24/63] remove debugging statement --- bbot/scanner/scanner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 13b3bfde84..a11ba49fbb 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -513,8 +513,6 @@ async def load_modules(self): # sort modules by priority self.modules = OrderedDict(sorted(self.modules.items(), key=lambda x: getattr(x[-1], "priority", 3))) - self.critical(list(self.modules)) - # hook modules get sewn together like human centipede self.hook_modules = [m for m in self.modules.values() if m._hook] for i, hook_module in enumerate(self.hook_modules[:-1]): From 17598f700735674005c6915e793fdf2fc7c3e99f Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sun, 14 Apr 2024 17:27:21 -0400 Subject: [PATCH 25/63] dns tests passing --- bbot/modules/base.py | 2 +- bbot/modules/internal/dnsresolve.py | 34 ++++++++++------ bbot/modules/sslcert.py | 7 +++- bbot/scanner/manager.py | 5 ++- bbot/scanner/scanner.py | 39 ++++++++++--------- .../test_manager_scope_accuracy.py | 13 +++---- 6 files changed, 59 insertions(+), 41 deletions(-) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index dbf3f4ce4b..0f8867201e 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -565,7 +565,7 @@ async def _setup(self): self.set_error_state(f"Unexpected error during module setup: {e}", critical=True) msg = f"{e}" self.trace() - return self.name, status, str(msg) + return self, status, str(msg) async def _worker(self): """ diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index e1771382d3..1056d93433 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -12,6 +12,7 @@ class dnsresolve(HookModule): watched_events = ["*"] _priority = 1 _max_event_handlers = 25 + scope_distance_modifier = None async def setup(self): self.dns_resolution = self.scan.config.get("dns_resolution", False) @@ -25,7 +26,7 @@ async def setup(self): return True @property - def scope_distance_modifier(self): + def _dns_search_distance(self): return max(self.scope_search_distance, self.scope_dns_search_distance) async def filter_event(self, event): @@ -43,8 +44,11 @@ async def handle_event(self, event, kwargs): event_host_hash = hash(str(event.host)) event_is_ip = self.helpers.is_ip(event.host) + # whether we've reached the max scope distance for dns + within_dns_search_distance = event.scope_distance < self._dns_search_distance + # only emit DNS children if we haven't seen this host before - emit_children = event_host_hash not in self._event_cache + emit_children = self.dns_resolution and event_host_hash not in self._event_cache # we do DNS resolution inside a lock to make sure we don't duplicate work # once the resolution happens, it will be cached so it doesn't need to happen again @@ -53,8 +57,16 @@ async def handle_event(self, event, kwargs): # try to get from cache dns_tags, dns_children, event_whitelisted, event_blacklisted = self._event_cache[event_host_hash] except KeyError: + if event_is_ip: + rdtypes_to_resolve = ["PTR"] + else: + if self.dns_resolution and within_dns_search_distance: + rdtypes_to_resolve = all_rdtypes + else: + rdtypes_to_resolve = ["A", "AAAA", "CNAME"] + # if missing from cache, do DNS resolution - queries = [(event_host, rdtype) for rdtype in all_rdtypes] + queries = [(event_host, rdtype) for rdtype in rdtypes_to_resolve] error_rdtypes = [] async for (query, rdtype), (answers, errors) in self.helpers.dns.resolve_raw_batch(queries): if errors: @@ -70,11 +82,10 @@ async def handle_event(self, event, kwargs): if rdtype not in dns_children: dns_tags.add(f"{rdtype.lower()}-error") - if not event_is_ip: - if dns_children: - dns_tags.add("resolved") - else: - dns_tags.add("unresolved") + if dns_children: + dns_tags.add("resolved") + elif not event_is_ip: + dns_tags.add("unresolved") for rdtype, children in dns_children.items(): if event_blacklisted: @@ -108,7 +119,7 @@ async def handle_event(self, event, kwargs): # abort if the event resolves to something blacklisted if event_blacklisted: event.add_tag("blacklisted") - return False, f"blacklisted DNS record" + return False, f"it has a blacklisted DNS record" # set resolved_hosts attribute for rdtype, children in dns_children.items(): @@ -152,7 +163,8 @@ async def handle_event(self, event, kwargs): and event.type not in ("DNS_NAME", "DNS_NAME_UNRESOLVED", "IP_ADDRESS", "IP_RANGE") and not (event.type in ("OPEN_TCP_PORT", "URL_UNVERIFIED") and str(event.module) == "speculate") ): - source_event = self.make_event(event.host, "DNS_NAME", source=event) + source_module = self.scan._make_dummy_module("host", _type="internal") + source_event = self.scan.make_event(event.host, "DNS_NAME", module=source_module, source=event) # only emit the event if it's not already in the parent chain if source_event is not None and source_event not in event.get_sources(): source_event.scope_distance = event.scope_distance @@ -162,7 +174,7 @@ async def handle_event(self, event, kwargs): # emit DNS children if emit_children: - in_dns_scope = -1 < event.scope_distance < self.scope_distance_modifier + in_dns_scope = -1 < event.scope_distance < self._dns_search_distance dns_child_events = [] if dns_children: for rdtype, records in dns_children.items(): diff --git a/bbot/modules/sslcert.py b/bbot/modules/sslcert.py index 357826920a..c6fec1ea97 100644 --- a/bbot/modules/sslcert.py +++ b/bbot/modules/sslcert.py @@ -78,8 +78,13 @@ async def handle_event(self, event): self.debug(f"Discovered new {event_type} via SSL certificate parsing: [{event_data}]") try: ssl_event = self.make_event(event_data, event_type, source=event, raise_error=True) + source_event = ssl_event.get_source() + if source_event.scope_distance == 0: + tags = ["affiliate"] + else: + tags = None if ssl_event: - await self.emit_event(ssl_event, on_success_callback=self.on_success_callback) + await self.emit_event(ssl_event, tags=tags) except ValidationError as e: self.hugeinfo(f'Malformed {event_type} "{event_data}" at {event.data}') self.debug(f"Invalid data at {host}:{port}: {e}") diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index 267175b212..f390bb65b6 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -118,7 +118,8 @@ def is_incoming_duplicate(self, event, add=False): try: event_hash = event.module._outgoing_dedup_hash(event) except AttributeError: - event_hash = hash((event, str(getattr(event, "module", "")))) + module_name = str(getattr(event, "module", "")) + event_hash = hash((event, module_name)) is_dup = event_hash in self.incoming_dup_tracker if add: self.incoming_dup_tracker.add(event_hash) @@ -172,7 +173,7 @@ async def handle_event(self, event, kwargs): if not source._graph_important: source._graph_important = True log.debug(f"Re-queuing internal event {source} with parent {event}") - self.queue_event(source) + await self.emit_event(source) abort_result = False if callable(abort_if): diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index a11ba49fbb..f64af069d6 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -252,6 +252,15 @@ async def _prep(self): # run each module's .setup() method succeeded, hard_failed, soft_failed = await self.setup_modules() + # hook modules get sewn together like human centipede + self.hook_modules = [m for m in self.modules.values() if m._hook] + for i, hook_module in enumerate(self.hook_modules[:-1]): + next_hook_module = self.hook_modules[i + 1] + self.debug( + f"Setting hook module {hook_module.name}.outgoing_event_queue to next hook module {next_hook_module.name}.incoming_event_queue" + ) + hook_module._outgoing_event_queue = next_hook_module.incoming_event_queue + # abort if there are no output modules num_output_modules = len([m for m in self.modules.values() if m._type == "output"]) if num_output_modules < 1: @@ -408,19 +417,20 @@ async def setup_modules(self, remove_failed=True): soft_failed = [] async for task in self.helpers.as_completed([m._setup() for m in self.modules.values()]): - module_name, status, msg = await task + module, status, msg = await task if status == True: - self.debug(f"Setup succeeded for {module_name} ({msg})") - succeeded.append(module_name) + self.debug(f"Setup succeeded for {module.name} ({msg})") + succeeded.append(module.name) elif status == False: - self.warning(f"Setup hard-failed for {module_name}: {msg}") - self.modules[module_name].set_error_state() - hard_failed.append(module_name) + self.warning(f"Setup hard-failed for {module.name}: {msg}") + self.modules[module.name].set_error_state() + hard_failed.append(module.name) else: - self.info(f"Setup soft-failed for {module_name}: {msg}") - soft_failed.append(module_name) - if not status and remove_failed: - self.modules.pop(module_name) + self.info(f"Setup soft-failed for {module.name}: {msg}") + soft_failed.append(module.name) + if (not status) and (module._hook or remove_failed): + # if a hook module fails setup, we always remove it + self.modules.pop(module.name) return succeeded, hard_failed, soft_failed @@ -513,15 +523,6 @@ async def load_modules(self): # sort modules by priority self.modules = OrderedDict(sorted(self.modules.items(), key=lambda x: getattr(x[-1], "priority", 3))) - # hook modules get sewn together like human centipede - self.hook_modules = [m for m in self.modules.values() if m._hook] - for i, hook_module in enumerate(self.hook_modules[:-1]): - next_hook_module = self.hook_modules[i + 1] - self.debug( - f"Setting hook module {hook_module.name}.outgoing_event_queue to next hook module {next_hook_module.name}.incoming_event_queue" - ) - hook_module._outgoing_event_queue = next_hook_module.incoming_event_queue - self._modules_loaded = True @property diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index 8f429c32ea..9ee2c72e50 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -111,8 +111,7 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) scan.modules["dummy_module_nodupes"] = dummy_module_nodupes scan.modules["dummy_graph_output_module"] = dummy_graph_output_module scan.modules["dummy_graph_batch_output_module"] = dummy_graph_batch_output_module - if _dns_mock: - await scan.helpers.dns._mock_dns(_dns_mock) + await scan.helpers.dns._mock_dns(_dns_mock) if scan_callback is not None: scan_callback(scan) return ( @@ -177,9 +176,7 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test2.notrealzies"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) - # assert len(all_events) == 9 - for e in all_events: - log.critical(e) + assert len(all_events) == 9 assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0]) assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal == True and e.scope_distance == 1]) assert 2 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal == True and e.scope_distance == 1]) @@ -327,6 +324,7 @@ def custom_setup(scan): "modules": {"speculate": {"ports": "8888"}}, "omit_event_types": ["HTTP_RESPONSE", "URL_UNVERIFIED"], }, + _dns_mock={}, ) assert len(events) == 6 @@ -567,6 +565,7 @@ def custom_setup(scan): modules=["httpx"], output_modules=["python"], _config={ + "dns_resolution": True, "scope_search_distance": 0, "scope_dns_search_distance": 2, "scope_report_distance": 0, @@ -743,7 +742,7 @@ def custom_setup(scan): assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == False and e.scope_distance == 1 and str(e.module) == "sslcert"]) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999"]) - assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal"]) + assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "bbottest.notreal"]) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) # sslcert with out-of-scope chain @@ -821,7 +820,7 @@ async def test_manager_blacklist(bbot_scanner, bbot_httpserver, caplog): # the hostname is in-scope, but its IP is blacklisted, therefore we shouldn't see it assert not any([e for e in events if e.type == "URL_UNVERIFIED" and e.data == "http://www-prod.test.notreal:8888/"]) - assert 'Omitting due to blacklisted DNS associations: URL_UNVERIFIED("http://www-prod.test.notreal:8888/"' in caplog.text + assert 'Not forwarding DNS_NAME("www-prod.test.notreal", module=excavate' in caplog.text and 'because it has a blacklisted DNS record' in caplog.text @pytest.mark.asyncio From dfd7b8858dc4888ce4fbee86ed0d140b598fda09 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 00:28:19 -0400 Subject: [PATCH 26/63] cloudcheck defragmentation --- bbot/core/helpers/cloud.py | 92 ---------------- bbot/core/helpers/helper.py | 8 +- bbot/modules/internal/cloud.py | 73 +++++++++++++ bbot/modules/internal/cloudcheck.py | 34 ------ .../internal/{dnsresolve.py => dns.py} | 13 ++- bbot/modules/internal/excavate.py | 8 -- bbot/modules/internal/speculate.py | 8 -- bbot/modules/templates/bucket.py | 2 +- bbot/test/test_step_1/test_cloud_helpers.py | 86 --------------- .../module_tests/test_module_cloud.py | 102 ++++++++++++++++++ 10 files changed, 188 insertions(+), 238 deletions(-) delete mode 100644 bbot/core/helpers/cloud.py create mode 100644 bbot/modules/internal/cloud.py delete mode 100644 bbot/modules/internal/cloudcheck.py rename bbot/modules/internal/{dnsresolve.py => dns.py} (96%) delete mode 100644 bbot/test/test_step_1/test_cloud_helpers.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_cloud.py diff --git a/bbot/core/helpers/cloud.py b/bbot/core/helpers/cloud.py deleted file mode 100644 index 7f1e19b695..0000000000 --- a/bbot/core/helpers/cloud.py +++ /dev/null @@ -1,92 +0,0 @@ -import asyncio -import logging - -from cloudcheck import cloud_providers - -log = logging.getLogger("bbot.helpers.cloud") - - -class CloudHelper: - def __init__(self, parent_helper): - self.parent_helper = parent_helper - self.providers = cloud_providers - self._updated = False - self._update_lock = asyncio.Lock() - - def excavate(self, event, s): - """ - Extract buckets, etc. from strings such as an HTTP responses - """ - for provider in self: - provider_name = provider.name.lower() - base_kwargs = {"source": event, "tags": [f"cloud-{provider_name}"], "_provider": provider_name} - for event_type, sigs in provider.signatures.items(): - found = set() - for sig in sigs: - for match in sig.findall(s): - kwargs = dict(base_kwargs) - kwargs["event_type"] = event_type - if not match in found: - found.add(match) - if event_type == "STORAGE_BUCKET": - yield self.emit_bucket(match, **kwargs) - else: - yield kwargs - - def speculate(self, event): - """ - Look for DNS_NAMEs that are buckets or other cloud resources - """ - for provider in self: - provider_name = provider.name.lower() - base_kwargs = dict( - source=event, tags=[f"{provider.provider_type}-{provider_name}"], _provider=provider_name - ) - if event.type.startswith("DNS_NAME"): - for event_type, sigs in provider.signatures.items(): - found = set() - for sig in sigs: - match = sig.match(event.data) - if match: - kwargs = dict(base_kwargs) - kwargs["event_type"] = event_type - if not event.data in found: - found.add(event.data) - if event_type == "STORAGE_BUCKET": - yield self.emit_bucket(match.groups(), **kwargs) - else: - yield kwargs - - def emit_bucket(self, match, **kwargs): - bucket_name, bucket_domain = match - kwargs["data"] = {"name": bucket_name, "url": f"https://{bucket_name}.{bucket_domain}"} - return kwargs - - async def tag_event(self, event): - """ - Tags an event according to cloud provider - """ - async with self._update_lock: - if not self._updated: - await self.providers.update() - self._updated = True - - if event.host: - for host in [event.host] + list(event.resolved_hosts): - provider_name, provider_type, source = self.providers.check(host) - if provider_name is not None: - provider = self.providers.providers[provider_name.lower()] - event.add_tag(f"{provider_type}-{provider_name.lower()}") - # if its host directly matches this cloud provider's domains - if not self.parent_helper.is_ip(host): - # tag as buckets, etc. - for event_type, sigs in provider.signatures.items(): - for sig in sigs: - if sig.match(host): - event.add_tag(f"{provider_type}-{event_type}") - - def __getitem__(self, item): - return self.providers.providers[item.lower()] - - def __iter__(self): - yield from self.providers diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index 8cc923fde1..d2bb4bb194 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -1,18 +1,17 @@ import os import logging from pathlib import Path +from cloudcheck import cloud_providers from . import misc from .dns import DNSHelper from .web import WebHelper from .diff import HttpCompare -from .cloud import CloudHelper from .wordcloud import WordCloud from .interactsh import Interactsh from ...scanner.target import Target from .depsinstaller import DepsInstaller - log = logging.getLogger("bbot.core.helpers") @@ -66,15 +65,14 @@ def __init__(self, preset): self.mkdir(self.tools_dir) self.mkdir(self.lib_dir) + self.cloud = cloud_providers + self.dns = DNSHelper(self) self.web = WebHelper(self) self.depsinstaller = DepsInstaller(self) self.word_cloud = WordCloud(self) self.dummy_modules = {} - # cloud helpers - self.cloud = CloudHelper(self) - def interactsh(self, *args, **kwargs): return Interactsh(self, *args, **kwargs) diff --git a/bbot/modules/internal/cloud.py b/bbot/modules/internal/cloud.py new file mode 100644 index 0000000000..3ed5f7f50c --- /dev/null +++ b/bbot/modules/internal/cloud.py @@ -0,0 +1,73 @@ +from bbot.modules.base import HookModule + + +class cloud(HookModule): + watched_events = ["*"] + meta = {"description": "Tag events by cloud provider, identify cloud resources like storage buckets"} + scope_distance_modifier = 1 + _priority = 3 + + async def setup(self): + self.dummy_modules = {} + for provider_name in self.helpers.cloud.providers: + self.dummy_modules[provider_name] = self.scan._make_dummy_module(f"cloud_{provider_name}", _type="scan") + return True + + async def filter_event(self, event): + if (not event.host) or (event.type in ("IP_RANGE",)): + return False, "event does not have host attribute" + return True + + async def handle_event(self, event, kwargs): + # cloud tagging by hosts + hosts_to_check = set(str(s) for s in event.resolved_hosts) + hosts_to_check.add(str(event.host)) + for host in hosts_to_check: + provider, provider_type, subnet = self.helpers.cloudcheck(host) + if provider: + event.add_tag(f"{provider_type}-{provider}") + + found = set() + # look for cloud assets in hosts, http responses + # loop through each provider + for provider in self.helpers.cloud.providers.values(): + provider_name = provider.name.lower() + base_kwargs = dict( + source=event, tags=[f"{provider.provider_type}-{provider_name}"], _provider=provider_name + ) + # loop through the provider's regex signatures, if any + for event_type, sigs in provider.signatures.items(): + if event_type != "STORAGE_BUCKET": + raise ValueError(f'Unknown cloudcheck event type "{event_type}"') + base_kwargs["event_type"] = event_type + for sig in sigs: + matches = [] + if event.type == "HTTP_RESPONSE": + matches = sig.findall(event.data.get("body", "")) + elif event.type.startswith("DNS_NAME"): + for host in hosts_to_check: + matches.append(sig.match(host)) + for match in matches: + if not match: + continue + if not event.data in found: + found.add(event.data) + if event_type == "STORAGE_BUCKET": + _kwargs = dict(base_kwargs) + event_type_tag = f"cloud-{event_type}" + _kwargs["tags"].append(event_type_tag) + if event.type.startswith("DNS_NAME"): + event.add_tag(event_type_tag) + bucket_name, bucket_domain = match.groups() + _kwargs["data"] = { + "name": bucket_name, + "url": f"https://{bucket_name}.{bucket_domain}", + } + await self.emit_event(**_kwargs) + + async def emit_event(self, *args, **kwargs): + provider_name = kwargs.pop("_provider") + dummy_module = self.dummy_modules[provider_name] + event = dummy_module.make_event(*args, **kwargs) + if event: + await super().emit_event(event) diff --git a/bbot/modules/internal/cloudcheck.py b/bbot/modules/internal/cloudcheck.py deleted file mode 100644 index 85dca28aaa..0000000000 --- a/bbot/modules/internal/cloudcheck.py +++ /dev/null @@ -1,34 +0,0 @@ -from bbot.modules.base import HookModule - - -class cloudcheck(HookModule): - watched_events = ["*"] - scope_distance_modifier = 1 - _priority = 3 - - async def filter_event(self, event): - if (not event.host) or (event.type in ("IP_RANGE",)): - return False, "event does not have host attribute" - return True - - async def handle_event(self, event, kwargs): - - # skip if we're in tests - if self.helpers.in_tests: - return - - # cloud tagging by main host - await self.scan.helpers.cloud.tag_event(event) - - # cloud tagging by resolved hosts - to_check = set() - if event.type == "IP_ADDRESS": - to_check.add(event.host) - for rdtype, hosts in event.dns_children.items(): - if rdtype in ("A", "AAAA"): - for host in hosts: - to_check.add(host) - for host in to_check: - provider, provider_type, subnet = self.helpers.cloudcheck(host) - if provider: - event.add_tag(f"{provider_type}-{provider}") diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dns.py similarity index 96% rename from bbot/modules/internal/dnsresolve.py rename to bbot/modules/internal/dns.py index 1056d93433..173d9129a7 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dns.py @@ -8,14 +8,18 @@ from bbot.core.helpers.async_helpers import NamedLock -class dnsresolve(HookModule): +class DNS(HookModule): watched_events = ["*"] _priority = 1 _max_event_handlers = 25 scope_distance_modifier = None async def setup(self): - self.dns_resolution = self.scan.config.get("dns_resolution", False) + self.dns_resolution = True + # you can disable DNS resolution with either the "dns" or "dns_resolution" config options + for key in ("dns", "dns_resolution"): + if self.scan.config.get(key, None) is False: + self.dns_resolution = False self.scope_search_distance = max(0, int(self.scan.config.get("scope_search_distance", 0))) self.scope_dns_search_distance = max(0, int(self.scan.config.get("scope_dns_search_distance", 1))) @@ -123,8 +127,9 @@ async def handle_event(self, event, kwargs): # set resolved_hosts attribute for rdtype, children in dns_children.items(): - for host in children: - event.resolved_hosts.add(host) + if rdtype in ("A", "AAAA", "CNAME"): + for host in children: + event.resolved_hosts.add(host) # set dns_children attribute event.dns_children = dns_children diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 0ec91ce677..1af70b051f 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -388,14 +388,6 @@ async def handle_event(self, event): body = self.helpers.recursive_decode(event.data.get("body", "")) - # Cloud extractors - for cloud_kwargs in self.helpers.cloud.excavate(event, body): - module = None - provider = cloud_kwargs.pop("_provider", "") - if provider: - module = self.scan._make_dummy_module(f"{provider}_cloud") - await self.emit_event(module=module, **cloud_kwargs) - await self.search( body, [ diff --git a/bbot/modules/internal/speculate.py b/bbot/modules/internal/speculate.py index e6d778a053..f983af6bf4 100644 --- a/bbot/modules/internal/speculate.py +++ b/bbot/modules/internal/speculate.py @@ -142,14 +142,6 @@ async def handle_event(self, event): quick=True, ) - # storage buckets etc. - for cloud_kwargs in self.helpers.cloud.speculate(event): - module = None - provider = cloud_kwargs.pop("_provider", "") - if provider: - module = self.scan._make_dummy_module(provider) - await self.emit_event(module=module, **cloud_kwargs) - # ORG_STUB from TLD, SOCIAL, AZURE_TENANT org_stubs = set() if event.type == "DNS_NAME" and event.scope_distance == 0: diff --git a/bbot/modules/templates/bucket.py b/bbot/modules/templates/bucket.py index 095da6b70d..04597faa20 100644 --- a/bbot/modules/templates/bucket.py +++ b/bbot/modules/templates/bucket.py @@ -19,7 +19,7 @@ class bucket_template(BaseModule): async def setup(self): self.buckets_tried = set() - self.cloud_helper = self.helpers.cloud[self.cloud_helper_name] + self.cloud_helper = self.helpers.cloud.providers[self.cloud_helper_name] self.permutations = self.config.get("permutations", False) return True diff --git a/bbot/test/test_step_1/test_cloud_helpers.py b/bbot/test/test_step_1/test_cloud_helpers.py deleted file mode 100644 index adfc290ca1..0000000000 --- a/bbot/test/test_step_1/test_cloud_helpers.py +++ /dev/null @@ -1,86 +0,0 @@ -from ..bbot_fixtures import * # noqa: F401 - - -@pytest.mark.asyncio -async def test_cloud_helpers(bbot_scanner): - scan1 = bbot_scanner("127.0.0.1") - - provider_names = ("amazon", "google", "azure", "digitalocean", "oracle", "akamai", "cloudflare", "github") - for provider_name in provider_names: - assert provider_name in scan1.helpers.cloud.providers.providers - - for p in scan1.helpers.cloud.providers.providers.values(): - print(f"{p.name}: {p.domains} / {p.ranges}") - amazon_ranges = list(scan1.helpers.cloud["amazon"].ranges) - assert amazon_ranges - amazon_range = next(iter(amazon_ranges)) - amazon_address = amazon_range.broadcast_address - - ip_event = scan1.make_event(amazon_address, source=scan1.root_event) - aws_event1 = scan1.make_event("amazonaws.com", source=scan1.root_event) - aws_event2 = scan1.make_event("asdf.amazonaws.com", source=scan1.root_event) - aws_event3 = scan1.make_event("asdfamazonaws.com", source=scan1.root_event) - aws_event4 = scan1.make_event("test.asdf.aws", source=scan1.root_event) - - other_event1 = scan1.make_event("cname.evilcorp.com", source=scan1.root_event) - other_event2 = scan1.make_event("cname2.evilcorp.com", source=scan1.root_event) - other_event3 = scan1.make_event("cname3.evilcorp.com", source=scan1.root_event) - other_event2._resolved_hosts = {amazon_address} - other_event3._resolved_hosts = {"asdf.amazonaws.com"} - - for event in (ip_event, aws_event1, aws_event2, aws_event4, other_event2, other_event3): - await scan1.helpers.cloud.tag_event(event) - assert "cloud-amazon" in event.tags, f"{event} was not properly cloud-tagged" - - for event in (aws_event3, other_event1): - await scan1.helpers.cloud.tag_event(event) - assert "cloud-amazon" not in event.tags, f"{event} was improperly cloud-tagged" - assert not any( - t for t in event.tags if t.startswith("cloud-") or t.startswith("cdn-") - ), f"{event} was improperly cloud-tagged" - - google_event1 = scan1.make_event("asdf.googleapis.com", source=scan1.root_event) - google_event2 = scan1.make_event("asdf.google", source=scan1.root_event) - google_event3 = scan1.make_event("asdf.evilcorp.com", source=scan1.root_event) - google_event3._resolved_hosts = {"asdf.storage.googleapis.com"} - - for event in (google_event1, google_event2, google_event3): - await scan1.helpers.cloud.tag_event(event) - assert "cloud-google" in event.tags, f"{event} was not properly cloud-tagged" - assert "cloud-storage-bucket" in google_event3.tags - - -@pytest.mark.asyncio -async def test_cloud_helpers_excavate(bbot_scanner, bbot_httpserver): - url = bbot_httpserver.url_for("/test_cloud_helpers_excavate") - bbot_httpserver.expect_request(uri="/test_cloud_helpers_excavate").respond_with_data( - "" - ) - scan1 = bbot_scanner(url, modules=["httpx"], config={"excavate": True}) - events = [e async for e in scan1.async_start()] - assert 1 == len( - [ - e - for e in events - if e.type == "STORAGE_BUCKET" - and e.data["name"] == "asdf" - and "cloud-amazon" in e.tags - and "cloud-storage-bucket" in e.tags - ] - ) - - -@pytest.mark.asyncio -async def test_cloud_helpers_speculate(bbot_scanner): - scan1 = bbot_scanner("asdf.s3.amazonaws.com", config={"speculate": True}) - events = [e async for e in scan1.async_start()] - assert 1 == len( - [ - e - for e in events - if e.type == "STORAGE_BUCKET" - and e.data["name"] == "asdf" - and "cloud-amazon" in e.tags - and "cloud-storage-bucket" in e.tags - ] - ) diff --git a/bbot/test/test_step_2/module_tests/test_module_cloud.py b/bbot/test/test_step_2/module_tests/test_module_cloud.py new file mode 100644 index 0000000000..89942ed066 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_cloud.py @@ -0,0 +1,102 @@ +from .base import ModuleTestBase + + +class TestCloud(ModuleTestBase): + targets = ["www.azure.com"] + + async def setup_after_prep(self, module_test): + scan = module_test.scan + module = module_test.module + providers = scan.helpers.cloud.providers + # make sure we have all the providers + provider_names = ( + "amazon", + "google", + "azure", + "digitalocean", + "oracle", + "akamai", + "cloudflare", + "github", + "zoho", + "fastly", + ) + for provider_name in provider_names: + assert provider_name in providers + + amazon_ranges = list(providers["amazon"].ranges) + assert amazon_ranges + amazon_range = next(iter(amazon_ranges)) + amazon_address = amazon_range.broadcast_address + + ip_event = scan.make_event(amazon_address, source=scan.root_event) + aws_event1 = scan.make_event("amazonaws.com", source=scan.root_event) + aws_event2 = scan.make_event("asdf.amazonaws.com", source=scan.root_event) + aws_event3 = scan.make_event("asdfamazonaws.com", source=scan.root_event) + aws_event4 = scan.make_event("test.asdf.aws", source=scan.root_event) + + other_event1 = scan.make_event("cname.evilcorp.com", source=scan.root_event) + other_event2 = scan.make_event("cname2.evilcorp.com", source=scan.root_event) + other_event3 = scan.make_event("cname3.evilcorp.com", source=scan.root_event) + other_event2._resolved_hosts = {amazon_address} + other_event3._resolved_hosts = {"asdf.amazonaws.com"} + + for event in (ip_event, aws_event1, aws_event2, aws_event4, other_event2, other_event3): + await module.handle_event(event, {}) + assert "cloud-amazon" in event.tags, f"{event} was not properly cloud-tagged" + + for event in (aws_event3, other_event1): + await module.handle_event(event, {}) + assert "cloud-amazon" not in event.tags, f"{event} was improperly cloud-tagged" + assert not any( + t for t in event.tags if t.startswith("cloud-") or t.startswith("cdn-") + ), f"{event} was improperly cloud-tagged" + + google_event1 = scan.make_event("asdf.googleapis.com", source=scan.root_event) + google_event2 = scan.make_event("asdf.google", source=scan.root_event) + google_event3 = scan.make_event("asdf.evilcorp.com", source=scan.root_event) + google_event3._resolved_hosts = {"asdf.storage.googleapis.com"} + + for event in (google_event1, google_event2, google_event3): + await module.handle_event(event, {}) + assert "cloud-google" in event.tags, f"{event} was not properly cloud-tagged" + assert "cloud-storage-bucket" in google_event3.tags + + def check(self, events, module_test): + pass + + +# @pytest.mark.asyncio +# async def test_cloud_helpers_excavate(bbot_scanner, bbot_httpserver): +# url = bbot_httpserver.url_for("/test_cloud_helpers_excavate") +# bbot_httpserver.expect_request(uri="/test_cloud_helpers_excavate").respond_with_data( +# "" +# ) +# scan = bbot_scanner(url, modules=["httpx"], config={"excavate": True}) +# events = [e async for e in scan.async_start()] +# assert 1 == len( +# [ +# e +# for e in events +# if e.type == "STORAGE_BUCKET" +# and e.data["name"] == "asdf" +# and "cloud-amazon" in e.tags +# and "cloud-storage-bucket" in e.tags +# ] +# ) + + +# @pytest.mark.asyncio +# async def test_cloud_helpers_speculate(bbot_scanner): +# scan = bbot_scanner("asdf.s3.amazonaws.com", config={"speculate": True}) +# events = [e async for e in scan.async_start()] +# assert 1 == len( +# [ +# e +# for e in events +# if e.type == "STORAGE_BUCKET" +# and e.data["name"] == "asdf" +# and "cloud-amazon" in e.tags +# and "cloud-storage-bucket" in e.tags +# ] +# ) From 06a4d38e0710a5a5dc5bfce33b6b6aee77604549 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 10:53:37 -0400 Subject: [PATCH 27/63] dns module tests --- bbot/core/event/base.py | 20 ++++- bbot/modules/internal/cloud.py | 26 ++++--- bbot/test/test_step_1/test_events.py | 7 ++ .../module_tests/test_module_cloud.py | 73 ++++++++----------- .../module_tests/test_module_dns.py | 62 ++++++++++++++++ 5 files changed, 133 insertions(+), 55 deletions(-) create mode 100644 bbot/test/test_step_2/module_tests/test_module_dns.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 345e64115b..74b476ee6a 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -151,6 +151,7 @@ def __init__( self._port = None self.__words = None self._priority = None + self._host_original = None self._module_priority = None self._resolved_hosts = set() self.dns_children = dict() @@ -275,9 +276,24 @@ def host(self): E.g. for IP_ADDRESS, it could be an ipaddress.IPv4Address() or IPv6Address() object """ if self.__host is None: - self.__host = self._host() + self.host = self._host() return self.__host + @host.setter + def host(self, host): + if self._host_original is None: + self._host_original = host + self.__host = host + + @property + def host_original(self): + """ + Original host data, in case it was changed due to a wildcard DNS, etc. + """ + if self._host_original is None: + return self.host + return self._host_original + @property def port(self): self.host @@ -793,7 +809,7 @@ def __init__(self, *args, **kwargs): ip = ipaddress.ip_address(self.data) self.add_tag(f"ipv{ip.version}") if ip.is_private: - self.add_tag("private") + self.add_tag("private-ip") self.dns_resolve_distance = getattr(self.source, "dns_resolve_distance", 0) def sanitize_data(self, data): diff --git a/bbot/modules/internal/cloud.py b/bbot/modules/internal/cloud.py index 3ed5f7f50c..bf69baa358 100644 --- a/bbot/modules/internal/cloud.py +++ b/bbot/modules/internal/cloud.py @@ -21,7 +21,7 @@ async def filter_event(self, event): async def handle_event(self, event, kwargs): # cloud tagging by hosts hosts_to_check = set(str(s) for s in event.resolved_hosts) - hosts_to_check.add(str(event.host)) + hosts_to_check.add(str(event.host_original)) for host in hosts_to_check: provider, provider_type, subnet = self.helpers.cloudcheck(host) if provider: @@ -46,19 +46,21 @@ async def handle_event(self, event, kwargs): matches = sig.findall(event.data.get("body", "")) elif event.type.startswith("DNS_NAME"): for host in hosts_to_check: - matches.append(sig.match(host)) + match = sig.match(host) + if match: + matches.append(match.groups()) for match in matches: - if not match: - continue - if not event.data in found: - found.add(event.data) + if not match in found: + found.add(match) + + _kwargs = dict(base_kwargs) + event_type_tag = f"cloud-{event_type}" + _kwargs["tags"].append(event_type_tag) + if event.type.startswith("DNS_NAME"): + event.add_tag(event_type_tag) + if event_type == "STORAGE_BUCKET": - _kwargs = dict(base_kwargs) - event_type_tag = f"cloud-{event_type}" - _kwargs["tags"].append(event_type_tag) - if event.type.startswith("DNS_NAME"): - event.add_tag(event_type_tag) - bucket_name, bucket_domain = match.groups() + bucket_name, bucket_domain = match _kwargs["data"] = { "name": bucket_name, "url": f"https://{bucket_name}.{bucket_domain}", diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 211ef4f369..5412346770 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -444,3 +444,10 @@ async def test_events(events, scan, helpers): event_5 = scan.make_event("127.0.0.5", source=event_4) assert event_5.get_sources() == [event_4, event_3, event_2, event_1, scan.root_event] assert event_5.get_sources(omit=True) == [event_4, event_2, event_1, scan.root_event] + + # test host backup + host_event = scan.make_event("asdf.evilcorp.com", "DNS_NAME", source=scan.root_event) + assert host_event.host_original == "asdf.evilcorp.com" + host_event.host = "_wildcard.evilcorp.com" + assert host_event.host == "_wildcard.evilcorp.com" + assert host_event.host_original == "asdf.evilcorp.com" diff --git a/bbot/test/test_step_2/module_tests/test_module_cloud.py b/bbot/test/test_step_2/module_tests/test_module_cloud.py index 89942ed066..1d4e59283a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_cloud.py +++ b/bbot/test/test_step_2/module_tests/test_module_cloud.py @@ -1,12 +1,19 @@ from .base import ModuleTestBase +from bbot.scanner import Scanner + class TestCloud(ModuleTestBase): - targets = ["www.azure.com"] + targets = ["http://127.0.0.1:8888", "asdf2.storage.googleapis.com"] + modules_overrides = ["httpx", "excavate", "cloud"] async def setup_after_prep(self, module_test): - scan = module_test.scan - module = module_test.module + + module_test.set_expect_requests({"uri": "/"}, {"response_data": ""}) + + scan = Scanner(config={"cloud": True}) + await scan._prep() + module = scan.modules["cloud"] providers = scan.helpers.cloud.providers # make sure we have all the providers provider_names = ( @@ -62,41 +69,25 @@ async def setup_after_prep(self, module_test): assert "cloud-google" in event.tags, f"{event} was not properly cloud-tagged" assert "cloud-storage-bucket" in google_event3.tags - def check(self, events, module_test): - pass - - -# @pytest.mark.asyncio -# async def test_cloud_helpers_excavate(bbot_scanner, bbot_httpserver): -# url = bbot_httpserver.url_for("/test_cloud_helpers_excavate") -# bbot_httpserver.expect_request(uri="/test_cloud_helpers_excavate").respond_with_data( -# "" -# ) -# scan = bbot_scanner(url, modules=["httpx"], config={"excavate": True}) -# events = [e async for e in scan.async_start()] -# assert 1 == len( -# [ -# e -# for e in events -# if e.type == "STORAGE_BUCKET" -# and e.data["name"] == "asdf" -# and "cloud-amazon" in e.tags -# and "cloud-storage-bucket" in e.tags -# ] -# ) - - -# @pytest.mark.asyncio -# async def test_cloud_helpers_speculate(bbot_scanner): -# scan = bbot_scanner("asdf.s3.amazonaws.com", config={"speculate": True}) -# events = [e async for e in scan.async_start()] -# assert 1 == len( -# [ -# e -# for e in events -# if e.type == "STORAGE_BUCKET" -# and e.data["name"] == "asdf" -# and "cloud-amazon" in e.tags -# and "cloud-storage-bucket" in e.tags -# ] -# ) + def check(self, module_test, events): + assert 2 == len([e for e in events if e.type == "STORAGE_BUCKET"]) + assert 1 == len( + [ + e + for e in events + if e.type == "STORAGE_BUCKET" + and e.data["name"] == "asdf" + and "cloud-amazon" in e.tags + and "cloud-storage-bucket" in e.tags + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "STORAGE_BUCKET" + and e.data["name"] == "asdf2" + and "cloud-google" in e.tags + and "cloud-storage-bucket" in e.tags + ] + ) diff --git a/bbot/test/test_step_2/module_tests/test_module_dns.py b/bbot/test/test_step_2/module_tests/test_module_dns.py new file mode 100644 index 0000000000..0351ac44e7 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_dns.py @@ -0,0 +1,62 @@ +from .base import ModuleTestBase + + +class TestDNS(ModuleTestBase): + modules_overrides = ["dns"] + config_overrides = {"dns_resolution": True, "scope_report_distance": 1} + + async def setup_after_prep(self, module_test): + await module_test.mock_dns( + { + "blacklanternsecurity.com": { + "A": ["192.168.0.7"], + "AAAA": ["::1"], + "CNAME": ["www.blacklanternsecurity.com"], + }, + "www.blacklanternsecurity.com": {"A": ["192.168.0.8"]}, + } + ) + + def check(self, module_test, events): + self.log.critical(events) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" + and e.data == "blacklanternsecurity.com" + and "a-record" in e.tags + and "aaaa-record" in e.tags + and "cname-record" in e.tags + and "private-ip" in e.tags + and e.scope_distance == 0 + and "192.168.0.7" in e.resolved_hosts + and "::1" in e.resolved_hosts + and "www.blacklanternsecurity.com" in e.resolved_hosts + and e.dns_children + == {"A": {"192.168.0.7"}, "AAAA": {"::1"}, "CNAME": {"www.blacklanternsecurity.com"}} + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" + and e.data == "www.blacklanternsecurity.com" + and "a-record" in e.tags + and "private-ip" in e.tags + and e.scope_distance == 0 + and "192.168.0.8" in e.resolved_hosts + and e.dns_children == {"A": {"192.168.0.8"}} + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" + and e.data == "192.168.0.7" + and "private-ip" in e.tags + and e.scope_distance == 1 + ] + ) From ebb729580bacd6d421dd2e2636f4fae0160d3d03 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 10:56:26 -0400 Subject: [PATCH 28/63] fix dns tests --- bbot/test/test_step_1/test_dns.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index f5f528ac34..7c1fda190b 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -111,13 +111,13 @@ async def test_dns_resolution(bbot_scanner): await scan._prep() resolved_hosts_event1 = scan.make_event("one.one.one.one", "DNS_NAME", source=scan.root_event) resolved_hosts_event2 = scan.make_event("http://one.one.one.one/", "URL_UNVERIFIED", source=scan.root_event) - dnsresolve = scan.modules["dnsresolve"] + dnsresolve = scan.modules["dns"] assert hash(resolved_hosts_event1.host) not in dnsresolve._event_cache assert hash(resolved_hosts_event2.host) not in dnsresolve._event_cache - await dnsresolve.handle_event(resolved_hosts_event1) + await dnsresolve.handle_event(resolved_hosts_event1, {}) assert hash(resolved_hosts_event1.host) in dnsresolve._event_cache assert hash(resolved_hosts_event2.host) in dnsresolve._event_cache - await dnsresolve.handle_event(resolved_hosts_event2) + await dnsresolve.handle_event(resolved_hosts_event2, {}) assert "1.1.1.1" in resolved_hosts_event2.resolved_hosts assert "1.1.1.1" in resolved_hosts_event2.dns_children["A"] assert resolved_hosts_event1.resolved_hosts == resolved_hosts_event2.resolved_hosts @@ -180,10 +180,10 @@ async def test_wildcards(bbot_scanner): # event resolution await scan._prep() - dnsresolve = scan.modules["dnsresolve"] - await dnsresolve.handle_event(wildcard_event1) - await dnsresolve.handle_event(wildcard_event2) - await dnsresolve.handle_event(wildcard_event3) + dnsresolve = scan.modules["dns"] + await dnsresolve.handle_event(wildcard_event1, {}) + await dnsresolve.handle_event(wildcard_event2, {}) + await dnsresolve.handle_event(wildcard_event3, {}) assert "wildcard" in wildcard_event1.tags assert "a-wildcard" in wildcard_event1.tags assert "srv-wildcard" not in wildcard_event1.tags @@ -197,7 +197,7 @@ async def test_wildcards(bbot_scanner): from bbot.scanner import Scanner # test with full scan - scan2 = Scanner("asdfl.gashdgkjsadgsdf.github.io", config={"dnsresolve": True}) + scan2 = Scanner("asdfl.gashdgkjsadgsdf.github.io", config={"dns_resolution": True}) events = [e async for e in scan2.async_start()] assert len(events) == 2 assert 1 == len([e for e in events if e.type == "SCAN"]) @@ -228,7 +228,5 @@ async def test_wildcards(bbot_scanner): scan2 = Scanner("asdfl.gashdgkjsadgsdf.github.io", config={"dns_wildcard_ignore": ["github.io"]}) events = [e async for e in scan2.async_start()] assert len(events) == 2 - for e in events: - log.critical(e) - # assert 1 == len([e for e in events if e.type == "SCAN"]) - # assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "asdfl.gashdgkjsadgsdf.github.io" and all(t in e.tags for t in ('a-record', 'target', 'resolved', 'in-scope', 'subdomain', 'aaaa-record')) and not any(t in e.tags for t in ("wildcard", "a-wildcard", "aaaa-wildcard"))]) + assert 1 == len([e for e in events if e.type == "SCAN"]) + assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "asdfl.gashdgkjsadgsdf.github.io" and all(t in e.tags for t in ('a-record', 'target', 'resolved', 'in-scope', 'subdomain', 'aaaa-record')) and not any(t in e.tags for t in ("wildcard", "a-wildcard", "aaaa-wildcard"))]) From c4e1a19ef04a74e7e2b8e6902ba98c2073870aa7 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 10:56:36 -0400 Subject: [PATCH 29/63] blacked --- bbot/test/test_step_1/test_dns.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index 7c1fda190b..0355656b80 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -229,4 +229,13 @@ async def test_wildcards(bbot_scanner): events = [e async for e in scan2.async_start()] assert len(events) == 2 assert 1 == len([e for e in events if e.type == "SCAN"]) - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "asdfl.gashdgkjsadgsdf.github.io" and all(t in e.tags for t in ('a-record', 'target', 'resolved', 'in-scope', 'subdomain', 'aaaa-record')) and not any(t in e.tags for t in ("wildcard", "a-wildcard", "aaaa-wildcard"))]) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" + and e.data == "asdfl.gashdgkjsadgsdf.github.io" + and all(t in e.tags for t in ("a-record", "target", "resolved", "in-scope", "subdomain", "aaaa-record")) + and not any(t in e.tags for t in ("wildcard", "a-wildcard", "aaaa-wildcard")) + ] + ) From db3961c262045004fe3655705fcc9d35125a2b7f Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 11:34:44 -0400 Subject: [PATCH 30/63] fix module tests --- bbot/test/test_step_1/test_cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index 6ada6e64e5..db2a5316d4 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -98,17 +98,17 @@ async def test_cli_args(monkeypatch, caplog, clean_default_config): monkeypatch.setattr("sys.argv", ["bbot", "-y"]) result = await cli._main() assert result == True - assert "Loaded 3/3 internal modules (aggregate,excavate,speculate)" in caplog.text + assert "Loaded 5/5 internal modules (aggregate,cloud,dns,excavate,speculate)" in caplog.text caplog.clear() monkeypatch.setattr("sys.argv", ["bbot", "-em", "excavate", "speculate", "-y"]) result = await cli._main() assert result == True - assert "Loaded 1/1 internal modules (aggregate)" in caplog.text + assert "Loaded 3/3 internal modules (aggregate,cloud,dns)" in caplog.text caplog.clear() monkeypatch.setattr("sys.argv", ["bbot", "-c", "speculate=false", "-y"]) result = await cli._main() assert result == True - assert "Loaded 2/2 internal modules (aggregate,excavate)" in caplog.text + assert "Loaded 4/4 internal modules (aggregate,cloud,dns,excavate)" in caplog.text # list modules caplog.clear() From b2679d2168b41a06c77fff9141ed10c13d1d8923 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 12:21:47 -0400 Subject: [PATCH 31/63] quick emit revisit --- bbot/scanner/manager.py | 8 ++++++++ bbot/scanner/stats.py | 3 +++ bbot/test/test_step_1/test_modules_basic.py | 6 +++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index f390bb65b6..2fd2188612 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -84,6 +84,14 @@ async def handle_event(self, event, kwargs): # nerf event's priority if it's not in scope event.module_priority += event.scope_distance + async def forward_event(self, event, kwargs): + # if a module qualifies for "quick-emit", we skip all the intermediate modules like dns and cloud + # and forward it straight to the egress module + if event.quick_emit: + await self.scan.egress_module.queue_event(event, kwargs) + else: + await super().forward_event(event, kwargs) + @property def non_hook_modules(self): if self._non_hook_modules is None: diff --git a/bbot/scanner/stats.py b/bbot/scanner/stats.py index 0c0a4d2877..617703d8c7 100644 --- a/bbot/scanner/stats.py +++ b/bbot/scanner/stats.py @@ -23,6 +23,9 @@ def event_produced(self, event): module_stat.increment_produced(event) def event_consumed(self, event, module): + # skip ingress/egress modules, etc. + if module.name.startswith("_"): + return module_stat = self.get(module) if module_stat is not None: module_stat.increment_consumed(event) diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 7f01428e66..5672e616e9 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -352,7 +352,7 @@ async def handle_event(self, event): "ORG_STUB": 1, } - assert set(scan.stats.module_stats) == {"host", "speculate", "python", "dummy", "TARGET"} + assert set(scan.stats.module_stats) == {'speculate', 'host', 'TARGET', 'python', 'dummy', 'cloud', 'dns'} target_stats = scan.stats.module_stats["TARGET"] assert target_stats.produced == {"SCAN": 1, "DNS_NAME": 1} @@ -363,8 +363,8 @@ async def handle_event(self, event): dummy_stats = scan.stats.module_stats["dummy"] assert dummy_stats.produced == {"FINDING": 1, "URL": 1} assert dummy_stats.produced_total == 2 - assert dummy_stats.consumed == {"DNS_NAME": 2, "OPEN_TCP_PORT": 1, "SCAN": 1, "URL": 1, "URL_UNVERIFIED": 1} - assert dummy_stats.consumed_total == 6 + assert dummy_stats.consumed == {"DNS_NAME": 2, "FINDING": 1, "OPEN_TCP_PORT": 1, "SCAN": 1, "URL": 1, "URL_UNVERIFIED": 1} + assert dummy_stats.consumed_total == 7 python_stats = scan.stats.module_stats["python"] assert python_stats.produced == {} From 5746f6e840499f45c57ecfc48888350c93329430 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 12:21:56 -0400 Subject: [PATCH 32/63] blacked --- bbot/test/test_step_1/test_modules_basic.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 5672e616e9..5fc187fe3a 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -352,7 +352,7 @@ async def handle_event(self, event): "ORG_STUB": 1, } - assert set(scan.stats.module_stats) == {'speculate', 'host', 'TARGET', 'python', 'dummy', 'cloud', 'dns'} + assert set(scan.stats.module_stats) == {"speculate", "host", "TARGET", "python", "dummy", "cloud", "dns"} target_stats = scan.stats.module_stats["TARGET"] assert target_stats.produced == {"SCAN": 1, "DNS_NAME": 1} @@ -363,7 +363,14 @@ async def handle_event(self, event): dummy_stats = scan.stats.module_stats["dummy"] assert dummy_stats.produced == {"FINDING": 1, "URL": 1} assert dummy_stats.produced_total == 2 - assert dummy_stats.consumed == {"DNS_NAME": 2, "FINDING": 1, "OPEN_TCP_PORT": 1, "SCAN": 1, "URL": 1, "URL_UNVERIFIED": 1} + assert dummy_stats.consumed == { + "DNS_NAME": 2, + "FINDING": 1, + "OPEN_TCP_PORT": 1, + "SCAN": 1, + "URL": 1, + "URL_UNVERIFIED": 1, + } assert dummy_stats.consumed_total == 7 python_stats = scan.stats.module_stats["python"] From 3b0efcff5e77c1c6d5e2068059179f0104ebdec0 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 12:37:45 -0400 Subject: [PATCH 33/63] fix preset tests --- bbot/test/test_step_1/test_presets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 864f1d0c4c..d84244e4fe 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -254,7 +254,7 @@ def test_preset_module_resolution(clean_default_config): # make sure we have the expected defaults assert not preset.scan_modules assert set(preset.output_modules) == {"python", "csv", "human", "json"} - assert set(preset.internal_modules) == {"aggregate", "excavate", "speculate"} + assert set(preset.internal_modules) == {"aggregate", "excavate", "speculate", "cloud", "dns"} assert preset.modules == set(preset.output_modules).union(set(preset.internal_modules)) # make sure dependency resolution works as expected From 30f325f6be57baeba8d4d38506c1f645a6991d66 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 13:02:57 -0400 Subject: [PATCH 34/63] fix affiliate tests --- bbot/modules/internal/dns.py | 8 +------- bbot/scanner/scanner.py | 6 +++++- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/bbot/modules/internal/dns.py b/bbot/modules/internal/dns.py index 173d9129a7..12e5608d9e 100644 --- a/bbot/modules/internal/dns.py +++ b/bbot/modules/internal/dns.py @@ -48,9 +48,6 @@ async def handle_event(self, event, kwargs): event_host_hash = hash(str(event.host)) event_is_ip = self.helpers.is_ip(event.host) - # whether we've reached the max scope distance for dns - within_dns_search_distance = event.scope_distance < self._dns_search_distance - # only emit DNS children if we haven't seen this host before emit_children = self.dns_resolution and event_host_hash not in self._event_cache @@ -64,10 +61,7 @@ async def handle_event(self, event, kwargs): if event_is_ip: rdtypes_to_resolve = ["PTR"] else: - if self.dns_resolution and within_dns_search_distance: - rdtypes_to_resolve = all_rdtypes - else: - rdtypes_to_resolve = ["A", "AAAA", "CNAME"] + rdtypes_to_resolve = all_rdtypes # if missing from cache, do DNS resolution queries = [(event_host, rdtype) for rdtype in rdtypes_to_resolve] diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index f64af069d6..4a8ebf0f6a 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -550,7 +550,11 @@ def queued_event_types(self): queues.add(module.outgoing_event_queue) for q in queues: - for event, _ in q._queue: + for item in q._queue: + try: + event, _ = item + except ValueError: + event = item event_type = getattr(event, "type", None) if event_type is not None: try: From b29356b9d795eb439c08c6a53c5f7452b6e029dd Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 13:21:50 -0400 Subject: [PATCH 35/63] fix tests --- bbot/core/helpers/dns/dns.py | 4 ++++ bbot/defaults.yml | 6 +++++- bbot/modules/internal/dns.py | 2 ++ bbot/test/test_step_1/test_dns.py | 14 ++++++++++++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index cc0a1ff4a5..9764687bf1 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -62,6 +62,7 @@ def __init__(self, parent_helper): self.max_dns_resolve_distance = self.config.get("max_dns_resolve_distance", 5) # wildcard handling + self.wildcard_disable = self.config.get("dns_wildcard_disable", False) self.wildcard_ignore = self.config.get("dns_wildcard_ignore", None) if not self.wildcard_ignore: self.wildcard_ignore = [] @@ -136,6 +137,9 @@ async def is_wildcard_domain(self, domain, log_info=False): return await self.run_and_return("is_wildcard_domain", domain=domain, log_info=False) def _wildcard_prevalidation(self, host): + if self.wildcard_disable: + return False + host = clean_dns_record(host) # skip check if it's an IP or a plain hostname if is_ip(host) or not "." in host: diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 5b6323ae43..9eefb838b7 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -79,11 +79,14 @@ httpx_retries: 1 http_debug: false # Maximum number of HTTP redirects to follow http_max_redirects: 5 + # DNS query timeout dns_timeout: 5 # How many times to retry DNS queries dns_retries: 1 -# Disable BBOT's smart DNS wildcard handling for select domains +# Completely disable BBOT's DNS wildcard detection +dns_wildcard_disable: False +# Disable BBOT's DNS wildcard detection for select domains dns_wildcard_ignore: [] # How many sanity checks to make when verifying wildcard DNS # Increase this value if BBOT's wildcard detection isn't working @@ -95,6 +98,7 @@ dns_abort_threshold: 50 dns_filter_ptrs: true # Enable/disable debug messages for dns queries dns_debug: false + # Whether to verify SSL certificates ssl_verify: false # How many scan results to keep before cleaning up the older ones diff --git a/bbot/modules/internal/dns.py b/bbot/modules/internal/dns.py index 12e5608d9e..104ae0ad77 100644 --- a/bbot/modules/internal/dns.py +++ b/bbot/modules/internal/dns.py @@ -10,6 +10,8 @@ class DNS(HookModule): watched_events = ["*"] + options = {"max_event_handlers": 25} + options_desc = {"max_event_handlers": "Number of concurrent DNS workers"} _priority = 1 _max_event_handlers = 25 scope_distance_modifier = None diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index 0355656b80..afc5c1967f 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -194,6 +194,20 @@ async def test_wildcards(bbot_scanner): assert wildcard_event2.data == "_wildcard.github.io" assert wildcard_event3.data == "github.io" + # dns resolve distance + event_distance_0 = scan.make_event("8.8.8.8", module=scan._make_dummy_module_dns("PTR"), source=scan.root_event) + assert event_distance_0.dns_resolve_distance == 0 + event_distance_1 = scan.make_event( + "evilcorp.com", module=scan._make_dummy_module_dns("A"), source=event_distance_0 + ) + assert event_distance_1.dns_resolve_distance == 1 + event_distance_2 = scan.make_event("1.2.3.4", module=scan._make_dummy_module_dns("PTR"), source=event_distance_1) + assert event_distance_2.dns_resolve_distance == 1 + event_distance_3 = scan.make_event( + "evilcorp.org", module=scan._make_dummy_module_dns("A"), source=event_distance_2 + ) + assert event_distance_3.dns_resolve_distance == 2 + from bbot.scanner import Scanner # test with full scan From 37d1a6eb22c8488e28308f40b26eee333f5cc4bc Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 14:04:30 -0400 Subject: [PATCH 36/63] fix bucket tests --- .../test_step_2/module_tests/test_module_bucket_amazon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py b/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py index 6d58dd36f5..37ce77c5a0 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py @@ -34,7 +34,7 @@ def module_name(self): @property def modules_overrides(self): - return ["excavate", "speculate", "httpx", self.module_name] + return ["excavate", "speculate", "httpx", self.module_name, "cloud"] def url_setup(self): self.url_1 = f"https://{self.random_bucket_1}/" @@ -71,7 +71,7 @@ async def setup_after_prep(self, module_test): def check(self, module_test, events): # make sure buckets were excavated assert any( - e.type == "STORAGE_BUCKET" and str(e.module) == f"{self.provider}_cloud" for e in events + e.type == "STORAGE_BUCKET" and str(e.module) == f"cloud_{self.provider}" for e in events ), f'bucket not found for module "{self.module_name}"' # make sure open buckets were found if module_test.module.supports_open_check: From 56a1557a566e8ee76ce248b598a7bbf9b689e3a4 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 15 Apr 2024 15:48:05 -0400 Subject: [PATCH 37/63] fix csv tests --- bbot/modules/internal/dns.py | 5 ++++- bbot/test/test_step_2/module_tests/test_module_csv.py | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/bbot/modules/internal/dns.py b/bbot/modules/internal/dns.py index 104ae0ad77..c55e7c5ebf 100644 --- a/bbot/modules/internal/dns.py +++ b/bbot/modules/internal/dns.py @@ -63,7 +63,10 @@ async def handle_event(self, event, kwargs): if event_is_ip: rdtypes_to_resolve = ["PTR"] else: - rdtypes_to_resolve = all_rdtypes + if self.dns_resolution: + rdtypes_to_resolve = all_rdtypes + else: + rdtypes_to_resolve = ("A", "AAAA", "CNAME") # if missing from cache, do DNS resolution queries = [(event_host, rdtype) for rdtype in rdtypes_to_resolve] diff --git a/bbot/test/test_step_2/module_tests/test_module_csv.py b/bbot/test/test_step_2/module_tests/test_module_csv.py index fc180d481b..0d6e326a96 100644 --- a/bbot/test/test_step_2/module_tests/test_module_csv.py +++ b/bbot/test/test_step_2/module_tests/test_module_csv.py @@ -2,6 +2,9 @@ class TestCSV(ModuleTestBase): + async def setup_after_prep(self, module_test): + await module_test.mock_dns({}) + def check(self, module_test, events): csv_file = module_test.scan.home / "output.csv" with open(csv_file) as f: From ffada9e211c48ce2b2db07a73e0ac5e66a5778d2 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 16 Apr 2024 16:45:14 -0400 Subject: [PATCH 38/63] rename HookModule --> InterceptModule --- bbot/modules/base.py | 12 +- bbot/modules/internal/cloud.py | 4 +- bbot/modules/internal/dns.py | 4 +- bbot/scanner/manager.py | 12 +- .../test_manager_scope_accuracy.py | 2 +- .../module_tests/test_module_dns.py | 1 - poetry.lock | 438 +++++------------- pyproject.toml | 1 - 8 files changed, 138 insertions(+), 336 deletions(-) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index 0f8867201e..26332aca68 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -1402,7 +1402,17 @@ def critical(self, *args, trace=True, **kwargs): self.trace() -class HookModule(BaseModule): +class InterceptModule(BaseModule): + """ + An Intercept Module is a special type of high-priority module that gets early access to events. + + If you want your module to tag or modify an event before it's distributed to the scan, it should + probably be an intercept module. + + Examples of intercept modules include `dns` (for DNS resolution and wildcard detection) + and `cloud` (for detection and tagging of cloud assets). + """ + accept_dupes = True suppress_dupes = False _hook = True diff --git a/bbot/modules/internal/cloud.py b/bbot/modules/internal/cloud.py index bf69baa358..6bfceacffb 100644 --- a/bbot/modules/internal/cloud.py +++ b/bbot/modules/internal/cloud.py @@ -1,7 +1,7 @@ -from bbot.modules.base import HookModule +from bbot.modules.base import InterceptModule -class cloud(HookModule): +class cloud(InterceptModule): watched_events = ["*"] meta = {"description": "Tag events by cloud provider, identify cloud resources like storage buckets"} scope_distance_modifier = 1 diff --git a/bbot/modules/internal/dns.py b/bbot/modules/internal/dns.py index c55e7c5ebf..c3db74891e 100644 --- a/bbot/modules/internal/dns.py +++ b/bbot/modules/internal/dns.py @@ -3,12 +3,12 @@ from cachetools import LRUCache from bbot.errors import ValidationError -from bbot.modules.base import HookModule +from bbot.modules.base import InterceptModule from bbot.core.helpers.dns.engine import all_rdtypes from bbot.core.helpers.async_helpers import NamedLock -class DNS(HookModule): +class DNS(InterceptModule): watched_events = ["*"] options = {"max_event_handlers": 25} options_desc = {"max_event_handlers": "Number of concurrent DNS workers"} diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index 2fd2188612..dd01d5879d 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -2,14 +2,16 @@ import logging from contextlib import suppress -from bbot.modules.base import HookModule +from bbot.modules.base import InterceptModule log = logging.getLogger("bbot.scanner.manager") -class ScanIngress(HookModule): +class ScanIngress(InterceptModule): """ - This is always the first hook module in the chain, responsible for basic scope checks + This is always the first intercept module in the chain, responsible for basic scope checks + + It has its own incoming queue, but will also pull events from modules' outgoing queues """ watched_events = ["*"] @@ -137,9 +139,9 @@ def is_incoming_duplicate(self, event, add=False): return False -class ScanEgress(HookModule): +class ScanEgress(InterceptModule): """ - This is always the last hook module in the chain, responsible for executing and acting on the + This is always the last intercept module in the chain, responsible for executing and acting on the `abort_if` and `on_success_callback` functions. """ diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index 9ee2c72e50..bc79a0029e 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -742,7 +742,7 @@ def custom_setup(scan): assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == False and e.scope_distance == 1 and str(e.module) == "sslcert"]) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999"]) - assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "bbottest.notreal"]) + assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal"]) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) # sslcert with out-of-scope chain diff --git a/bbot/test/test_step_2/module_tests/test_module_dns.py b/bbot/test/test_step_2/module_tests/test_module_dns.py index 0351ac44e7..d74b623510 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dns.py +++ b/bbot/test/test_step_2/module_tests/test_module_dns.py @@ -18,7 +18,6 @@ async def setup_after_prep(self, module_test): ) def check(self, module_test, events): - self.log.critical(events) assert 1 == len( [ e diff --git a/poetry.lock b/poetry.lock index ebe1b70131..00ee057158 100644 --- a/poetry.lock +++ b/poetry.lock @@ -584,21 +584,24 @@ wmi = ["wmi (>=1.5.1)"] [[package]] name = "docutils" -version = "0.20.1" +version = "0.21.1" description = "Docutils -- Python Documentation Utilities" optional = false -python-versions = "*" -files = [] +python-versions = ">=3.9" +files = [ + {file = "docutils-0.21.1-py3-none-any.whl", hash = "sha256:14c8d34a55b46c88f9f714adb29cefbdd69fb82f3fef825e59c5faab935390d8"}, + {file = "docutils-0.21.1.tar.gz", hash = "sha256:65249d8a5345bc95e0f40f280ba63c98eb24de35c6c8f5b662e3e8948adea83f"}, +] [[package]] name = "dunamai" -version = "1.19.2" +version = "1.20.0" description = "Dynamic version generation" optional = false python-versions = ">=3.5" files = [ - {file = "dunamai-1.19.2-py3-none-any.whl", hash = "sha256:bc126b17571a44d68ed826cec596e0f61dc01edca8b21486f70014936a5d44f2"}, - {file = "dunamai-1.19.2.tar.gz", hash = "sha256:3be4049890763e19b8df1d52960dbea60b3e263eb0c96144a677ae0633734d2e"}, + {file = "dunamai-1.20.0-py3-none-any.whl", hash = "sha256:a2185c227351a52a013c7d7a695d3f3cb6625c3eed14a5295adbbcc7e2f7f8d4"}, + {file = "dunamai-1.20.0.tar.gz", hash = "sha256:c3f1ee64a1e6cc9ebc98adafa944efaccd0db32482d2177e59c1ff6bdf23cd70"}, ] [package.dependencies] @@ -669,13 +672,13 @@ dev = ["flake8", "markdown", "twine", "wheel"] [[package]] name = "griffe" -version = "0.42.1" +version = "0.42.2" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." optional = false python-versions = ">=3.8" files = [ - {file = "griffe-0.42.1-py3-none-any.whl", hash = "sha256:7e805e35617601355edcac0d3511cedc1ed0cb1f7645e2d336ae4b05bbae7b3b"}, - {file = "griffe-0.42.1.tar.gz", hash = "sha256:57046131384043ed078692b85d86b76568a686266cc036b9b56b704466f803ce"}, + {file = "griffe-0.42.2-py3-none-any.whl", hash = "sha256:bf9a09d7e9dcc3aca6a2c7ab4f63368c19e882f58c816fbd159bea613daddde3"}, + {file = "griffe-0.42.2.tar.gz", hash = "sha256:d5547b7a1a0786f84042379a5da8bd97c11d0464d4de3d7510328ebce5fda772"}, ] [package.dependencies] @@ -692,32 +695,6 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] -[[package]] -name = "h2" -version = "4.1.0" -description = "HTTP/2 State-Machine based protocol implementation" -optional = false -python-versions = ">=3.6.1" -files = [ - {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, - {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, -] - -[package.dependencies] -hpack = ">=4.0,<5" -hyperframe = ">=6.0,<7" - -[[package]] -name = "hpack" -version = "4.0.0" -description = "Pure-Python HPACK header compression" -optional = false -python-versions = ">=3.6.1" -files = [ - {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, - {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, -] - [[package]] name = "httpcore" version = "1.0.5" @@ -763,17 +740,6 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] -[[package]] -name = "hyperframe" -version = "6.0.1" -description = "HTTP/2 framing layer for Python" -optional = false -python-versions = ">=3.6.1" -files = [ - {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, - {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, -] - [[package]] name = "identify" version = "2.5.35" @@ -861,17 +827,6 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] -[[package]] -name = "kiss-headers" -version = "2.4.3" -description = "Object-oriented HTTP and IMAP (structured) headers." -optional = false -python-versions = ">=3.7" -files = [ - {file = "kiss_headers-2.4.3-py3-none-any.whl", hash = "sha256:9d800b77532068e8748be9f96f30eaeb547cdc5345e4689ddf07b77071256239"}, - {file = "kiss_headers-2.4.3.tar.gz", hash = "sha256:70c689ce167ac83146f094ea916b40a3767d67c2e05a4cb95b0fd2e33bf243f1"}, -] - [[package]] name = "libsass" version = "0.23.0" @@ -1257,13 +1212,13 @@ mkdocs = ">=1.1" [[package]] name = "mkdocs-material" -version = "9.5.17" +version = "9.5.18" description = "Documentation that simply works" optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_material-9.5.17-py3-none-any.whl", hash = "sha256:14a2a60119a785e70e765dd033e6211367aca9fc70230e577c1cf6a326949571"}, - {file = "mkdocs_material-9.5.17.tar.gz", hash = "sha256:06ae1275a72db1989cf6209de9e9ecdfbcfdbc24c58353877b2bb927dbe413e4"}, + {file = "mkdocs_material-9.5.18-py3-none-any.whl", hash = "sha256:1e0e27fc9fe239f9064318acf548771a4629d5fd5dfd45444fd80a953fe21eb4"}, + {file = "mkdocs_material-9.5.18.tar.gz", hash = "sha256:a43f470947053fa2405c33995f282d24992c752a50114f23f30da9d8d0c57e62"}, ] [package.dependencies] @@ -1349,30 +1304,6 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] -[[package]] -name = "niquests" -version = "3.5.5" -description = "Niquests is a simple, yet elegant, HTTP library. It is a drop-in replacement for Requests, which is under feature freeze." -optional = false -python-versions = ">=3.7" -files = [ - {file = "niquests-3.5.5-py3-none-any.whl", hash = "sha256:bd134c7cbc414661840e73bebe0b766c16321558b3c444efb3f63aad9189e308"}, - {file = "niquests-3.5.5.tar.gz", hash = "sha256:5b52183cd4ee16f360de1e5b97bc266b933e8603320102d10d17f68a95e926ba"}, -] - -[package.dependencies] -charset-normalizer = ">=2,<4" -idna = ">=2.5,<4" -kiss-headers = ">=2,<4" -urllib3-future = ">=2.7.900,<3" -wassima = ">=1.0.1,<2" - -[package.extras] -http3 = ["urllib3-future[qh3]"] -ocsp = ["cryptography (>=41.0.0,<43.0.0)"] -socks = ["urllib3-future[socks]"] -speedups = ["orjson (>=3,<4)", "urllib3-future[brotli,zstd]"] - [[package]] name = "nodeenv" version = "1.8.0" @@ -2152,147 +2083,106 @@ files = [ [package.dependencies] cffi = {version = "*", markers = "implementation_name == \"pypy\""} -[[package]] -name = "qh3" -version = "0.15.1" -description = "An implementation of QUIC and HTTP/3" -optional = false -python-versions = ">=3.7" -files = [ - {file = "qh3-0.15.1-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:fe8f15e9fe5850508188ce38bdc89bda03d1a99ce3c2fbde6ee02d1d91edc557"}, - {file = "qh3-0.15.1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:114d04dd51d3d9eca76ce804fea60ccb0fcbe84be08dcca70f32e30e5736aa00"}, - {file = "qh3-0.15.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:265240539a630cf458f3651f08bd07e4d46b2bf941a25e7f594321401701b30d"}, - {file = "qh3-0.15.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1074ee0e30fe825b60bd113767b56dcfe2f155e79f893d5180d4fd2adebaa1de"}, - {file = "qh3-0.15.1-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0afd9e7b90c90ff3e8c8e376020e3753936da0ce8db57ebb9fc95a50ba7e015d"}, - {file = "qh3-0.15.1-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:b7c4df89b03f90f67e372693c70f357dabc18908cb07dab21aa550c4f777017b"}, - {file = "qh3-0.15.1-cp37-abi3-musllinux_1_1_i686.whl", hash = "sha256:3923bb17dbdf91f060cb3b04cb8c2e3bf74d528a26f4c0e5365e311bade33b58"}, - {file = "qh3-0.15.1-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:87b61b59e3c692b70384430ccf634a228c54bb38ee6d974d76a7b086b356ecad"}, - {file = "qh3-0.15.1-cp37-abi3-win32.whl", hash = "sha256:3d02314850b0c8a5cd39015b9f5e5b21d54980702e3e80dcfc6aa7b983d7494a"}, - {file = "qh3-0.15.1-cp37-abi3-win_amd64.whl", hash = "sha256:1a0305b389cec13af879dee32c6584cff45a52865456e6645d84023ed8442d67"}, - {file = "qh3-0.15.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:8c000a32d2d3dadf252a55d71f676011f02c0e529024176d35e53122293d8a54"}, - {file = "qh3-0.15.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9562c2648a0e468cc3c97e77c658c0b9db288e29cfc79d52220e50ddcfac9fe9"}, - {file = "qh3-0.15.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71ab5d62606556c0ba2b1f3bf118dcb2d6f0236add792ffba42845a741abe498"}, - {file = "qh3-0.15.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:592bd246325090ffe8324761808713b1c99c7b7cae37ec4bd2841d0054729422"}, - {file = "qh3-0.15.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f68ac19161aaef887351f2e8df1972d91726ade69105b4ae1653ab0e70a18536"}, - {file = "qh3-0.15.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a842e65e57f8092f1fa185b1dc95556b1b695f06a4eb48dc9c07f018bd7a7ec"}, - {file = "qh3-0.15.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bca27698ad110fabda026f844f453b1ac1a1e2d86729846f5be0cdc9e7df419"}, - {file = "qh3-0.15.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c87a0613efbc3d353a76a917044270caf43198890ffe702b3cbe9b44065c45e"}, - {file = "qh3-0.15.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:77673a9b02e19c4f81e419efa2aa4040dec10f0a6158788196d8b5ef6aafb0d9"}, - {file = "qh3-0.15.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:087da39ebd5a8608e8df0892860b4fdcd4ff83753d7312cead490de6f1bce504"}, - {file = "qh3-0.15.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:561ba4d84e617ecc0d7506f532da2814e672a06cdcb903209616f00c5da74c14"}, - {file = "qh3-0.15.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec733c6a4da5ecf4448434562aba617ecfabbdef0a58df812684db7d03000070"}, - {file = "qh3-0.15.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74cc03a94e605820f3c5882e47388e8d2d8616d51db57a6e5120d9f2344dc04a"}, - {file = "qh3-0.15.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:939253ceaf5664c4e90f6317f0097839b6c8af627bb5905181f4fcbbc209c395"}, - {file = "qh3-0.15.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:969582d286be3e468ff5e53cdf2a5f47a942ea370f870a0276c4235a7ed13a71"}, - {file = "qh3-0.15.1.tar.gz", hash = "sha256:816c787f68855a28aa703be54956b21ff258e1650978a06b98a23bbf252cbe7e"}, -] - -[package.dependencies] -cryptography = ">=41.0.0,<43" - -[package.extras] -dev = ["coverage[toml] (>=7.2.2)"] - [[package]] name = "regex" -version = "2023.12.25" +version = "2024.4.16" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.7" files = [ - {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0694219a1d54336fd0445ea382d49d36882415c0134ee1e8332afd1529f0baa5"}, - {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b014333bd0217ad3d54c143de9d4b9a3ca1c5a29a6d0d554952ea071cff0f1f8"}, - {file = "regex-2023.12.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d865984b3f71f6d0af64d0d88f5733521698f6c16f445bb09ce746c92c97c586"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e0eabac536b4cc7f57a5f3d095bfa557860ab912f25965e08fe1545e2ed8b4c"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c25a8ad70e716f96e13a637802813f65d8a6760ef48672aa3502f4c24ea8b400"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9b6d73353f777630626f403b0652055ebfe8ff142a44ec2cf18ae470395766e"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9cc99d6946d750eb75827cb53c4371b8b0fe89c733a94b1573c9dd16ea6c9e4"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88d1f7bef20c721359d8675f7d9f8e414ec5003d8f642fdfd8087777ff7f94b5"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cb3fe77aec8f1995611f966d0c656fdce398317f850d0e6e7aebdfe61f40e1cd"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7aa47c2e9ea33a4a2a05f40fcd3ea36d73853a2aae7b4feab6fc85f8bf2c9704"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:df26481f0c7a3f8739fecb3e81bc9da3fcfae34d6c094563b9d4670b047312e1"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c40281f7d70baf6e0db0c2f7472b31609f5bc2748fe7275ea65a0b4601d9b392"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:d94a1db462d5690ebf6ae86d11c5e420042b9898af5dcf278bd97d6bda065423"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba1b30765a55acf15dce3f364e4928b80858fa8f979ad41f862358939bdd1f2f"}, - {file = "regex-2023.12.25-cp310-cp310-win32.whl", hash = "sha256:150c39f5b964e4d7dba46a7962a088fbc91f06e606f023ce57bb347a3b2d4630"}, - {file = "regex-2023.12.25-cp310-cp310-win_amd64.whl", hash = "sha256:09da66917262d9481c719599116c7dc0c321ffcec4b1f510c4f8a066f8768105"}, - {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1b9d811f72210fa9306aeb88385b8f8bcef0dfbf3873410413c00aa94c56c2b6"}, - {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d902a43085a308cef32c0d3aea962524b725403fd9373dea18110904003bac97"}, - {file = "regex-2023.12.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d166eafc19f4718df38887b2bbe1467a4f74a9830e8605089ea7a30dd4da8887"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7ad32824b7f02bb3c9f80306d405a1d9b7bb89362d68b3c5a9be53836caebdb"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:636ba0a77de609d6510235b7f0e77ec494d2657108f777e8765efc060094c98c"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fda75704357805eb953a3ee15a2b240694a9a514548cd49b3c5124b4e2ad01b"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f72cbae7f6b01591f90814250e636065850c5926751af02bb48da94dfced7baa"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db2a0b1857f18b11e3b0e54ddfefc96af46b0896fb678c85f63fb8c37518b3e7"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7502534e55c7c36c0978c91ba6f61703faf7ce733715ca48f499d3dbbd7657e0"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e8c7e08bb566de4faaf11984af13f6bcf6a08f327b13631d41d62592681d24fe"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:283fc8eed679758de38fe493b7d7d84a198b558942b03f017b1f94dda8efae80"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f44dd4d68697559d007462b0a3a1d9acd61d97072b71f6d1968daef26bc744bd"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:67d3ccfc590e5e7197750fcb3a2915b416a53e2de847a728cfa60141054123d4"}, - {file = "regex-2023.12.25-cp311-cp311-win32.whl", hash = "sha256:68191f80a9bad283432385961d9efe09d783bcd36ed35a60fb1ff3f1ec2efe87"}, - {file = "regex-2023.12.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d2af3f6b8419661a0c421584cfe8aaec1c0e435ce7e47ee2a97e344b98f794f"}, - {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8a0ccf52bb37d1a700375a6b395bff5dd15c50acb745f7db30415bae3c2b0715"}, - {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c3c4a78615b7762740531c27cf46e2f388d8d727d0c0c739e72048beb26c8a9d"}, - {file = "regex-2023.12.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ad83e7545b4ab69216cef4cc47e344d19622e28aabec61574b20257c65466d6a"}, - {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7a635871143661feccce3979e1727c4e094f2bdfd3ec4b90dfd4f16f571a87a"}, - {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d498eea3f581fbe1b34b59c697512a8baef88212f92e4c7830fcc1499f5b45a5"}, - {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43f7cd5754d02a56ae4ebb91b33461dc67be8e3e0153f593c509e21d219c5060"}, - {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51f4b32f793812714fd5307222a7f77e739b9bc566dc94a18126aba3b92b98a3"}, - {file = "regex-2023.12.25-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba99d8077424501b9616b43a2d208095746fb1284fc5ba490139651f971d39d9"}, - {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4bfc2b16e3ba8850e0e262467275dd4d62f0d045e0e9eda2bc65078c0110a11f"}, - {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8c2c19dae8a3eb0ea45a8448356ed561be843b13cbc34b840922ddf565498c1c"}, - {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:60080bb3d8617d96f0fb7e19796384cc2467447ef1c491694850ebd3670bc457"}, - {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b77e27b79448e34c2c51c09836033056a0547aa360c45eeeb67803da7b0eedaf"}, - {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:518440c991f514331f4850a63560321f833979d145d7d81186dbe2f19e27ae3d"}, - {file = "regex-2023.12.25-cp312-cp312-win32.whl", hash = "sha256:e2610e9406d3b0073636a3a2e80db05a02f0c3169b5632022b4e81c0364bcda5"}, - {file = "regex-2023.12.25-cp312-cp312-win_amd64.whl", hash = "sha256:cc37b9aeebab425f11f27e5e9e6cf580be7206c6582a64467a14dda211abc232"}, - {file = "regex-2023.12.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:da695d75ac97cb1cd725adac136d25ca687da4536154cdc2815f576e4da11c69"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d126361607b33c4eb7b36debc173bf25d7805847346dd4d99b5499e1fef52bc7"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4719bb05094d7d8563a450cf8738d2e1061420f79cfcc1fa7f0a44744c4d8f73"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dd58946bce44b53b06d94aa95560d0b243eb2fe64227cba50017a8d8b3cd3e2"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22a86d9fff2009302c440b9d799ef2fe322416d2d58fc124b926aa89365ec482"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2aae8101919e8aa05ecfe6322b278f41ce2994c4a430303c4cd163fef746e04f"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e692296c4cc2873967771345a876bcfc1c547e8dd695c6b89342488b0ea55cd8"}, - {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:263ef5cc10979837f243950637fffb06e8daed7f1ac1e39d5910fd29929e489a"}, - {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:d6f7e255e5fa94642a0724e35406e6cb7001c09d476ab5fce002f652b36d0c39"}, - {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:88ad44e220e22b63b0f8f81f007e8abbb92874d8ced66f32571ef8beb0643b2b"}, - {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:3a17d3ede18f9cedcbe23d2daa8a2cd6f59fe2bf082c567e43083bba3fb00347"}, - {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d15b274f9e15b1a0b7a45d2ac86d1f634d983ca40d6b886721626c47a400bf39"}, - {file = "regex-2023.12.25-cp37-cp37m-win32.whl", hash = "sha256:ed19b3a05ae0c97dd8f75a5d8f21f7723a8c33bbc555da6bbe1f96c470139d3c"}, - {file = "regex-2023.12.25-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d1047952c0b8104a1d371f88f4ab62e6275567d4458c1e26e9627ad489b445"}, - {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b43523d7bc2abd757119dbfb38af91b5735eea45537ec6ec3a5ec3f9562a1c53"}, - {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:efb2d82f33b2212898f1659fb1c2e9ac30493ac41e4d53123da374c3b5541e64"}, - {file = "regex-2023.12.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7fca9205b59c1a3d5031f7e64ed627a1074730a51c2a80e97653e3e9fa0d415"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086dd15e9435b393ae06f96ab69ab2d333f5d65cbe65ca5a3ef0ec9564dfe770"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e81469f7d01efed9b53740aedd26085f20d49da65f9c1f41e822a33992cb1590"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34e4af5b27232f68042aa40a91c3b9bb4da0eeb31b7632e0091afc4310afe6cb"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9852b76ab558e45b20bf1893b59af64a28bd3820b0c2efc80e0a70a4a3ea51c1"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff100b203092af77d1a5a7abe085b3506b7eaaf9abf65b73b7d6905b6cb76988"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cc038b2d8b1470364b1888a98fd22d616fba2b6309c5b5f181ad4483e0017861"}, - {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:094ba386bb5c01e54e14434d4caabf6583334090865b23ef58e0424a6286d3dc"}, - {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5cd05d0f57846d8ba4b71d9c00f6f37d6b97d5e5ef8b3c3840426a475c8f70f4"}, - {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:9aa1a67bbf0f957bbe096375887b2505f5d8ae16bf04488e8b0f334c36e31360"}, - {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:98a2636994f943b871786c9e82bfe7883ecdaba2ef5df54e1450fa9869d1f756"}, - {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37f8e93a81fc5e5bd8db7e10e62dc64261bcd88f8d7e6640aaebe9bc180d9ce2"}, - {file = "regex-2023.12.25-cp38-cp38-win32.whl", hash = "sha256:d78bd484930c1da2b9679290a41cdb25cc127d783768a0369d6b449e72f88beb"}, - {file = "regex-2023.12.25-cp38-cp38-win_amd64.whl", hash = "sha256:b521dcecebc5b978b447f0f69b5b7f3840eac454862270406a39837ffae4e697"}, - {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f7bc09bc9c29ebead055bcba136a67378f03d66bf359e87d0f7c759d6d4ffa31"}, - {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e14b73607d6231f3cc4622809c196b540a6a44e903bcfad940779c80dffa7be7"}, - {file = "regex-2023.12.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9eda5f7a50141291beda3edd00abc2d4a5b16c29c92daf8d5bd76934150f3edc"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc6bb9aa69aacf0f6032c307da718f61a40cf970849e471254e0e91c56ffca95"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298dc6354d414bc921581be85695d18912bea163a8b23cac9a2562bbcd5088b1"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f4e475a80ecbd15896a976aa0b386c5525d0ed34d5c600b6d3ebac0a67c7ddf"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531ac6cf22b53e0696f8e1d56ce2396311254eb806111ddd3922c9d937151dae"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22f3470f7524b6da61e2020672df2f3063676aff444db1daa283c2ea4ed259d6"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:89723d2112697feaa320c9d351e5f5e7b841e83f8b143dba8e2d2b5f04e10923"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0ecf44ddf9171cd7566ef1768047f6e66975788258b1c6c6ca78098b95cf9a3d"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:905466ad1702ed4acfd67a902af50b8db1feeb9781436372261808df7a2a7bca"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:4558410b7a5607a645e9804a3e9dd509af12fb72b9825b13791a37cd417d73a5"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:7e316026cc1095f2a3e8cc012822c99f413b702eaa2ca5408a513609488cb62f"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3b1de218d5375cd6ac4b5493e0b9f3df2be331e86520f23382f216c137913d20"}, - {file = "regex-2023.12.25-cp39-cp39-win32.whl", hash = "sha256:11a963f8e25ab5c61348d090bf1b07f1953929c13bd2309a0662e9ff680763c9"}, - {file = "regex-2023.12.25-cp39-cp39-win_amd64.whl", hash = "sha256:e693e233ac92ba83a87024e1d32b5f9ab15ca55ddd916d878146f4e3406b5c91"}, - {file = "regex-2023.12.25.tar.gz", hash = "sha256:29171aa128da69afdf4bde412d5bedc335f2ca8fcfe4489038577d05f16181e5"}, + {file = "regex-2024.4.16-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fb83cc090eac63c006871fd24db5e30a1f282faa46328572661c0a24a2323a08"}, + {file = "regex-2024.4.16-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8c91e1763696c0eb66340c4df98623c2d4e77d0746b8f8f2bee2c6883fd1fe18"}, + {file = "regex-2024.4.16-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:10188fe732dec829c7acca7422cdd1bf57d853c7199d5a9e96bb4d40db239c73"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:956b58d692f235cfbf5b4f3abd6d99bf102f161ccfe20d2fd0904f51c72c4c66"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a70b51f55fd954d1f194271695821dd62054d949efd6368d8be64edd37f55c86"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c02fcd2bf45162280613d2e4a1ca3ac558ff921ae4e308ecb307650d3a6ee51"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4ed75ea6892a56896d78f11006161eea52c45a14994794bcfa1654430984b22"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd727ad276bb91928879f3aa6396c9a1d34e5e180dce40578421a691eeb77f47"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7cbc5d9e8a1781e7be17da67b92580d6ce4dcef5819c1b1b89f49d9678cc278c"}, + {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:78fddb22b9ef810b63ef341c9fcf6455232d97cfe03938cbc29e2672c436670e"}, + {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:445ca8d3c5a01309633a0c9db57150312a181146315693273e35d936472df912"}, + {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:95399831a206211d6bc40224af1c635cb8790ddd5c7493e0bd03b85711076a53"}, + {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:7731728b6568fc286d86745f27f07266de49603a6fdc4d19c87e8c247be452af"}, + {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4facc913e10bdba42ec0aee76d029aedda628161a7ce4116b16680a0413f658a"}, + {file = "regex-2024.4.16-cp310-cp310-win32.whl", hash = "sha256:911742856ce98d879acbea33fcc03c1d8dc1106234c5e7d068932c945db209c0"}, + {file = "regex-2024.4.16-cp310-cp310-win_amd64.whl", hash = "sha256:e0a2df336d1135a0b3a67f3bbf78a75f69562c1199ed9935372b82215cddd6e2"}, + {file = "regex-2024.4.16-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1210365faba7c2150451eb78ec5687871c796b0f1fa701bfd2a4a25420482d26"}, + {file = "regex-2024.4.16-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9ab40412f8cd6f615bfedea40c8bf0407d41bf83b96f6fc9ff34976d6b7037fd"}, + {file = "regex-2024.4.16-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fd80d1280d473500d8086d104962a82d77bfbf2b118053824b7be28cd5a79ea5"}, + {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bb966fdd9217e53abf824f437a5a2d643a38d4fd5fd0ca711b9da683d452969"}, + {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:20b7a68444f536365af42a75ccecb7ab41a896a04acf58432db9e206f4e525d6"}, + {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b74586dd0b039c62416034f811d7ee62810174bb70dffcca6439f5236249eb09"}, + {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c8290b44d8b0af4e77048646c10c6e3aa583c1ca67f3b5ffb6e06cf0c6f0f89"}, + {file = "regex-2024.4.16-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2d80a6749724b37853ece57988b39c4e79d2b5fe2869a86e8aeae3bbeef9eb0"}, + {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3a1018e97aeb24e4f939afcd88211ace472ba566efc5bdf53fd8fd7f41fa7170"}, + {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8d015604ee6204e76569d2f44e5a210728fa917115bef0d102f4107e622b08d5"}, + {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:3d5ac5234fb5053850d79dd8eb1015cb0d7d9ed951fa37aa9e6249a19aa4f336"}, + {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:0a38d151e2cdd66d16dab550c22f9521ba79761423b87c01dae0a6e9add79c0d"}, + {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:159dc4e59a159cb8e4e8f8961eb1fa5d58f93cb1acd1701d8aff38d45e1a84a6"}, + {file = "regex-2024.4.16-cp311-cp311-win32.whl", hash = "sha256:ba2336d6548dee3117520545cfe44dc28a250aa091f8281d28804aa8d707d93d"}, + {file = "regex-2024.4.16-cp311-cp311-win_amd64.whl", hash = "sha256:8f83b6fd3dc3ba94d2b22717f9c8b8512354fd95221ac661784df2769ea9bba9"}, + {file = "regex-2024.4.16-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:80b696e8972b81edf0af2a259e1b2a4a661f818fae22e5fa4fa1a995fb4a40fd"}, + {file = "regex-2024.4.16-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d61ae114d2a2311f61d90c2ef1358518e8f05eafda76eaf9c772a077e0b465ec"}, + {file = "regex-2024.4.16-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8ba6745440b9a27336443b0c285d705ce73adb9ec90e2f2004c64d95ab5a7598"}, + {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6295004b2dd37b0835ea5c14a33e00e8cfa3c4add4d587b77287825f3418d310"}, + {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4aba818dcc7263852aabb172ec27b71d2abca02a593b95fa79351b2774eb1d2b"}, + {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0800631e565c47520aaa04ae38b96abc5196fe8b4aa9bd864445bd2b5848a7a"}, + {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08dea89f859c3df48a440dbdcd7b7155bc675f2fa2ec8c521d02dc69e877db70"}, + {file = "regex-2024.4.16-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eeaa0b5328b785abc344acc6241cffde50dc394a0644a968add75fcefe15b9d4"}, + {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4e819a806420bc010489f4e741b3036071aba209f2e0989d4750b08b12a9343f"}, + {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:c2d0e7cbb6341e830adcbfa2479fdeebbfbb328f11edd6b5675674e7a1e37730"}, + {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:91797b98f5e34b6a49f54be33f72e2fb658018ae532be2f79f7c63b4ae225145"}, + {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:d2da13568eff02b30fd54fccd1e042a70fe920d816616fda4bf54ec705668d81"}, + {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:370c68dc5570b394cbaadff50e64d705f64debed30573e5c313c360689b6aadc"}, + {file = "regex-2024.4.16-cp312-cp312-win32.whl", hash = "sha256:904c883cf10a975b02ab3478bce652f0f5346a2c28d0a8521d97bb23c323cc8b"}, + {file = "regex-2024.4.16-cp312-cp312-win_amd64.whl", hash = "sha256:785c071c982dce54d44ea0b79cd6dfafddeccdd98cfa5f7b86ef69b381b457d9"}, + {file = "regex-2024.4.16-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e2f142b45c6fed48166faeb4303b4b58c9fcd827da63f4cf0a123c3480ae11fb"}, + {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e87ab229332ceb127a165612d839ab87795972102cb9830e5f12b8c9a5c1b508"}, + {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81500ed5af2090b4a9157a59dbc89873a25c33db1bb9a8cf123837dcc9765047"}, + {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b340cccad138ecb363324aa26893963dcabb02bb25e440ebdf42e30963f1a4e0"}, + {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c72608e70f053643437bd2be0608f7f1c46d4022e4104d76826f0839199347a"}, + {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a01fe2305e6232ef3e8f40bfc0f0f3a04def9aab514910fa4203bafbc0bb4682"}, + {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:03576e3a423d19dda13e55598f0fd507b5d660d42c51b02df4e0d97824fdcae3"}, + {file = "regex-2024.4.16-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:549c3584993772e25f02d0656ac48abdda73169fe347263948cf2b1cead622f3"}, + {file = "regex-2024.4.16-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:34422d5a69a60b7e9a07a690094e824b66f5ddc662a5fc600d65b7c174a05f04"}, + {file = "regex-2024.4.16-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:5f580c651a72b75c39e311343fe6875d6f58cf51c471a97f15a938d9fe4e0d37"}, + {file = "regex-2024.4.16-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:3399dd8a7495bbb2bacd59b84840eef9057826c664472e86c91d675d007137f5"}, + {file = "regex-2024.4.16-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8d1f86f3f4e2388aa3310b50694ac44daefbd1681def26b4519bd050a398dc5a"}, + {file = "regex-2024.4.16-cp37-cp37m-win32.whl", hash = "sha256:dd5acc0a7d38fdc7a3a6fd3ad14c880819008ecb3379626e56b163165162cc46"}, + {file = "regex-2024.4.16-cp37-cp37m-win_amd64.whl", hash = "sha256:ba8122e3bb94ecda29a8de4cf889f600171424ea586847aa92c334772d200331"}, + {file = "regex-2024.4.16-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:743deffdf3b3481da32e8a96887e2aa945ec6685af1cfe2bcc292638c9ba2f48"}, + {file = "regex-2024.4.16-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7571f19f4a3fd00af9341c7801d1ad1967fc9c3f5e62402683047e7166b9f2b4"}, + {file = "regex-2024.4.16-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:df79012ebf6f4efb8d307b1328226aef24ca446b3ff8d0e30202d7ebcb977a8c"}, + {file = "regex-2024.4.16-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e757d475953269fbf4b441207bb7dbdd1c43180711b6208e129b637792ac0b93"}, + {file = "regex-2024.4.16-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4313ab9bf6a81206c8ac28fdfcddc0435299dc88cad12cc6305fd0e78b81f9e4"}, + {file = "regex-2024.4.16-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d83c2bc678453646f1a18f8db1e927a2d3f4935031b9ad8a76e56760461105dd"}, + {file = "regex-2024.4.16-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9df1bfef97db938469ef0a7354b2d591a2d438bc497b2c489471bec0e6baf7c4"}, + {file = "regex-2024.4.16-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62120ed0de69b3649cc68e2965376048793f466c5a6c4370fb27c16c1beac22d"}, + {file = "regex-2024.4.16-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c2ef6f7990b6e8758fe48ad08f7e2f66c8f11dc66e24093304b87cae9037bb4a"}, + {file = "regex-2024.4.16-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8fc6976a3395fe4d1fbeb984adaa8ec652a1e12f36b56ec8c236e5117b585427"}, + {file = "regex-2024.4.16-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:03e68f44340528111067cecf12721c3df4811c67268b897fbe695c95f860ac42"}, + {file = "regex-2024.4.16-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ec7e0043b91115f427998febaa2beb82c82df708168b35ece3accb610b91fac1"}, + {file = "regex-2024.4.16-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c21fc21a4c7480479d12fd8e679b699f744f76bb05f53a1d14182b31f55aac76"}, + {file = "regex-2024.4.16-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:12f6a3f2f58bb7344751919a1876ee1b976fe08b9ffccb4bbea66f26af6017b9"}, + {file = "regex-2024.4.16-cp38-cp38-win32.whl", hash = "sha256:479595a4fbe9ed8f8f72c59717e8cf222da2e4c07b6ae5b65411e6302af9708e"}, + {file = "regex-2024.4.16-cp38-cp38-win_amd64.whl", hash = "sha256:0534b034fba6101611968fae8e856c1698da97ce2efb5c2b895fc8b9e23a5834"}, + {file = "regex-2024.4.16-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a7ccdd1c4a3472a7533b0a7aa9ee34c9a2bef859ba86deec07aff2ad7e0c3b94"}, + {file = "regex-2024.4.16-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f2f017c5be19984fbbf55f8af6caba25e62c71293213f044da3ada7091a4455"}, + {file = "regex-2024.4.16-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:803b8905b52de78b173d3c1e83df0efb929621e7b7c5766c0843704d5332682f"}, + {file = "regex-2024.4.16-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:684008ec44ad275832a5a152f6e764bbe1914bea10968017b6feaecdad5736e0"}, + {file = "regex-2024.4.16-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65436dce9fdc0aeeb0a0effe0839cb3d6a05f45aa45a4d9f9c60989beca78b9c"}, + {file = "regex-2024.4.16-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea355eb43b11764cf799dda62c658c4d2fdb16af41f59bb1ccfec517b60bcb07"}, + {file = "regex-2024.4.16-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98c1165f3809ce7774f05cb74e5408cd3aa93ee8573ae959a97a53db3ca3180d"}, + {file = "regex-2024.4.16-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cccc79a9be9b64c881f18305a7c715ba199e471a3973faeb7ba84172abb3f317"}, + {file = "regex-2024.4.16-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:00169caa125f35d1bca6045d65a662af0202704489fada95346cfa092ec23f39"}, + {file = "regex-2024.4.16-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6cc38067209354e16c5609b66285af17a2863a47585bcf75285cab33d4c3b8df"}, + {file = "regex-2024.4.16-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:23cff1b267038501b179ccbbd74a821ac4a7192a1852d1d558e562b507d46013"}, + {file = "regex-2024.4.16-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:b9d320b3bf82a39f248769fc7f188e00f93526cc0fe739cfa197868633d44701"}, + {file = "regex-2024.4.16-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:89ec7f2c08937421bbbb8b48c54096fa4f88347946d4747021ad85f1b3021b3c"}, + {file = "regex-2024.4.16-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4918fd5f8b43aa7ec031e0fef1ee02deb80b6afd49c85f0790be1dc4ce34cb50"}, + {file = "regex-2024.4.16-cp39-cp39-win32.whl", hash = "sha256:684e52023aec43bdf0250e843e1fdd6febbe831bd9d52da72333fa201aaa2335"}, + {file = "regex-2024.4.16-cp39-cp39-win_amd64.whl", hash = "sha256:e697e1c0238133589e00c244a8b676bc2cfc3ab4961318d902040d099fec7483"}, + {file = "regex-2024.4.16.tar.gz", hash = "sha256:fa454d26f2e87ad661c4f0c5a5fe4cf6aab1e307d1b94f16ffdfcb089ba685c0"}, ] [[package]] @@ -2348,18 +2238,18 @@ test = ["commentjson", "packaging", "pytest"] [[package]] name = "setuptools" -version = "69.2.0" +version = "69.5.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-69.2.0-py3-none-any.whl", hash = "sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c"}, - {file = "setuptools-69.2.0.tar.gz", hash = "sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e"}, + {file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"}, + {file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] [[package]] @@ -2511,28 +2401,6 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] -[[package]] -name = "urllib3-future" -version = "2.7.903" -description = "urllib3.future is a powerful HTTP 1.1, 2, and 3 client with both sync and async interfaces" -optional = false -python-versions = ">=3.7" -files = [ - {file = "urllib3_future-2.7.903-py3-none-any.whl", hash = "sha256:04bebce1291c9be9db2b03bb016db56d1f7e3dbe425c7250129552a8ceaf6827"}, - {file = "urllib3_future-2.7.903.tar.gz", hash = "sha256:99e1265c8bb2478d86b8a6c0de991ac275ad58d5e43ac11d980a0dd1cc183804"}, -] - -[package.dependencies] -h11 = ">=0.11.0,<1.0.0" -h2 = ">=4.0.0,<5.0.0" -qh3 = {version = ">=0.14.0,<1.0.0", markers = "(platform_system == \"Darwin\" or platform_system == \"Windows\" or platform_system == \"Linux\") and (platform_python_implementation == \"CPython\" or (platform_python_implementation == \"PyPy\" and python_version >= \"3.8\" and python_version < \"3.11\"))"} - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -qh3 = ["qh3 (>=0.14.0,<1.0.0)"] -socks = ["python-socks (>=2.0,<3.0)"] -zstd = ["zstandard (>=0.18.0)"] - [[package]] name = "virtualenv" version = "20.25.1" @@ -2553,82 +2421,6 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] -[[package]] -name = "wassima" -version = "1.1.0" -description = "Access your OS root certificates with the atmost ease" -optional = false -python-versions = ">=3.7" -files = [ - {file = "wassima-1.1.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6b67781f7b9483a5dbcb1cabe588ab316f06f7c97a9d60b6981681f790aa16a1"}, - {file = "wassima-1.1.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:fb331ab3ff4222ced413a9830c1e9e6a834e7257bfee0043d2f56166ef4aa1cb"}, - {file = "wassima-1.1.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8904e865a2ac81d8160878e7788bc5ee6f4ca6948cf5c5198a83050d68537024"}, - {file = "wassima-1.1.0-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c414ee94cd1986d7ea3700a6d80efc9ae9b194c37d77396bcfaf927b0d5a620e"}, - {file = "wassima-1.1.0-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7d5d2d1f4f35808a58c8fe7777db14526bd53f77a34b373f070912b2c23f2c3b"}, - {file = "wassima-1.1.0-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ee1b84222c65f0e2b8ecb6362cc721df1953a0a59e13efc7a4055592fd897f8"}, - {file = "wassima-1.1.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56450dee854ce494003f2be92f2eddb2531c02a456a7866dd32af467672c3b7b"}, - {file = "wassima-1.1.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28897780714f49331fd3e76531ea248df637bbf3e7bf4be175381a92d596c460"}, - {file = "wassima-1.1.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:7528cbfe710af7f9e92cd52296efd7a788466b7cc7fe575b196f604a6ba2281c"}, - {file = "wassima-1.1.0-cp37-abi3-musllinux_1_1_armv7l.whl", hash = "sha256:4c3325dff14e796d346e81f90067d054714b99a3d86b6d0a5a76d85bafd2b654"}, - {file = "wassima-1.1.0-cp37-abi3-musllinux_1_1_i686.whl", hash = "sha256:e6609ca3d620792c1dc137efff4c189adee0f13f266ae14515d7de2952159b95"}, - {file = "wassima-1.1.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:60a695e750f9b4fc3cc79cbbbb5e507b8f9715e4135906bb1822126fad1ce5a2"}, - {file = "wassima-1.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:fca891820f7c679d3adc2443d6f85d6201db4badc6b17927d70fa498168d1aea"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:66efd9ee997bfb2311ade7a09f3174d6450a8695ab6b323840539c5826a276c6"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:a8550eb00a31eac76a5b5fab3ca2e87cc8d91781191dffa3e133ebf574305321"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4df677518d7779fc8a522132c4d96391e0a262dd12bb54ec3937bc8b58f6d3d5"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:29d4f6d006ce96c2087c42414ad72ef71bc25bd20ac457dfe89ab2448b0d08e4"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e73264427c6e3f93c7e1b0529960a342a6b4c9c16d17785872a845ee2b0d28f5"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fdbc87751649a23377f279873aae902a38ce44162170edd6b6893d47a259a78"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce49ac61ca651f49c2664003215e259a017d5a1116d669ef331c4930214f53b0"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9522b4905fc75eeaac8518c54362e87d89e83bbefebdb1898a0ef025006e8241"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1b2e419d3075e425ecdcefd486ccd56697dc209e6e2120746477a995392b9402"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1ebe5b0feead8b0457b885f181156574bf9ca88df6fe4cef6ad6b364f02d9e98"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-musllinux_1_1_i686.whl", hash = "sha256:6947c5e2d23383f00199b2cf638d7a090dfe5949bad113387e020b83f2423815"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:4420be43f5b4e2b7721080130de565a582299d0d02771c9a7db55366d9c93da5"}, - {file = "wassima-1.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a9fb48413d80d41aa6531a2271516f63c8a1debac016cf8fad6a2fd30aa4486d"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9a4593db51fc02529950158f1217e08c9d62e1299e20a19858f07f80c6d09197"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-macosx_11_0_arm64.whl", hash = "sha256:127aecd895501e79b76114109dba0e4bcf6adcf47169f75d44ecd08b4d983ae7"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7d36eb4e92a348f58182f7f69b0e2fc680ac6605377f5201bac40b303727493"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0e29ace26e79b923d5b0f04c38dff44dc47b9c48684894d8f20841c6ee79760"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ab77a0390ba74b7a011918ae5c187e2936cd46f4abffd37c5ff228dbdc4b5e89"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8b18821d94eabef23946e21566e7ae7c009ef3a89fe1bc0204e791ba5fdb8ed5"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ef95ad25c04629939d6a6015a798c8b0435cebc0c53cc4b1dabb2a89214a4d8"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ebef47ce05288db306d4f56937f96c48da07afaec014a6ed46ecb17176f874bf"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f0833555f8a334cf1c824f24b07c6b01b13128825d16f7802c4c70d14d2dbe09"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:31afbbe4ea11ea9f92b152e4a61495614bfc0ae3d7c3215a24928144bab79f99"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-musllinux_1_1_i686.whl", hash = "sha256:a759b84855b70922ee31234264ea2f4a058943a38270a18f00fd597f365b4bcb"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa01044ab3ca1f55e2d0d08128a97a68e9022795587627ee9edb3471c72e5df4"}, - {file = "wassima-1.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:08d6cf46441d73335b84c15c4f891adcb59f70701a13ecdee82aead5e0a9b134"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:23b56e0560bd2f35fceab001099bb890d8238fed64e7a0677cacbd1c4d870183"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac1866ee965263e3e024049044e8a5ce905fea2d40e005be03dcd89265fc1e6c"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4285c88f5cb4322318e3c3666d79b923f5317451efc2701011d960774d812675"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1f643f02b0824e789a7c98b9089dfd772a74ceec1a611cf420799f986cadb6bc"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9a7b91256ca429f99beff72dd89b0d5bd6ee1ca8f047138785c5b943eebfb1"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4224cf40a81840618a22164d4002fe5bb9b83cde957ec16e8913996809e705dd"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1006c7510b8495559fc2f1f27a7e49205140eb6b91a91f2c71cd91c2588522ae"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5abfa548d3c7acbf87899fc4af99c5a1fe929cf8cc7a7fd65a825dd88fa37b10"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c5854745eb0fd9243ebbaad46dc1f6f5193dd3f13f12dd19da95877ee2a8d62c"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:9def0580135d80a64aac4761e008d0d82fad5feb9c5028ba9427393144e4a535"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-musllinux_1_1_i686.whl", hash = "sha256:450501472645fe5ea65f1848466ce5a0f2800ed5e13288fa4c210728e2883d24"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:9764c226493e4a9b960156c3657ef7cece430ab8bad0035ebffb0eeb488633cb"}, - {file = "wassima-1.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:83792a234431f7fbd06f3370e968b99df430ab3bacdb9ea3318247d55dee3b6c"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:aee6fcfa43ce63691ec30943681e9432ff6cecbd976526c7ec0e5f2aaf85866a"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:31e69da1f3cf1ce4f24dbc4590101d68fcb3e1f715566fe30b6691429e9c1b10"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b574a646498b191bc8974524458d85bc55335992dc8ea7cddcb09ec58c01d4"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3112316434fd3ed3cfb1eac4998f54ed46d07a36172d18d543c0815a98e0d51"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:18e6f92114f878ea26fea7a10af255a6aadfddb1600f20fdfe96d65598e62501"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:09d54c87ce23ec2332f2acefc030ae3f4262b94cb1f0c613c8d2e30c297d12d7"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9d953b261b7b64072fbed7b4bf4441f7910d8247387f29cc82f8c314f7acf39"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3c8ef5a3d129997147f5475c276bc79da14ac59a8f614f07634e2aac5d9b2f94"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e9a6da09d6a03c0c8ec3f5c6b7fa5061f051d67a0e0f0ec1518d2bd76efb7535"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:39d65b2beb0eb17a92cdf859d8e9146a15f8d7f35ab95602780a3ac078069e7e"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-musllinux_1_1_i686.whl", hash = "sha256:e5ed0411e3a14e9352ff83e47952df03b7c8915f9fd4c9fb0888a80ac2759dcf"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:8f5869858975755d90e5505d3a7e2ac687cd09a348bc48137fd5b270969bd7a0"}, - {file = "wassima-1.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:39742c4884b7d1b3314064895e10345b96c7cab0a8f622e65f7beea89c0de4d2"}, - {file = "wassima-1.1.0-py3-none-any.whl", hash = "sha256:d250b77c1964c03f010a271fdd0cad3e14af250fb15cc3a729f23ee1e5922f69"}, - {file = "wassima-1.1.0.tar.gz", hash = "sha256:0ae03025ec07c0491e2d1a499d404eb66180c226f403451042190294f6ec7f06"}, -] - [[package]] name = "watchdog" version = "4.0.0" @@ -2821,4 +2613,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "38517d808d6bc20a9e2c8597b4024707537f2a92d1f75c67a5ed3477c139418b" +content-hash = "1532c2dc5846395a46766fead9f3c29223369ba11025b04e4eebec508fe0d8da" diff --git a/pyproject.toml b/pyproject.toml index cb8766c381..53dfe2131c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,6 @@ cachetools = "^5.3.2" socksio = "^1.0.0" jinja2 = "^3.1.3" pyzmq = "^25.1.2" -niquests = "^3.5.5" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From d90b2b16b24fa6cfaa0f43dbc37487f0cdad9a49 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 16 Apr 2024 17:45:09 -0400 Subject: [PATCH 39/63] let there be shared module dependencies --- bbot/core/helpers/depsinstaller/installer.py | 14 +++ bbot/core/modules.py | 30 ++++++- bbot/core/shared_deps.py | 95 ++++++++++++++++++++ bbot/defaults.yml | 5 ++ bbot/modules/deadly/ffuf.py | 14 +-- bbot/modules/ffuf_shortnames.py | 12 +-- bbot/modules/gowitness.py | 39 +------- bbot/modules/massdns.py | 36 +------- 8 files changed, 144 insertions(+), 101 deletions(-) create mode 100644 bbot/core/shared_deps.py diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py index 8b9d2ae2bc..c386b6c3b1 100644 --- a/bbot/core/helpers/depsinstaller/installer.py +++ b/bbot/core/helpers/depsinstaller/installer.py @@ -148,6 +148,20 @@ async def install_module(self, module): if deps_pip: success &= await self.pip_install(deps_pip, constraints=deps_pip_constraints) + # shared/common + deps_common = preloaded["deps"]["common"] + if deps_common: + for dep_common in deps_common: + if self.setup_status.get(dep_common, False) == True: + log.critical( + f'Skipping installation of dependency "{dep_common}" for module "{module}" since it is already installed' + ) + continue + ansible_tasks = self.preset.module_loader._shared_deps[dep_common] + result = self.tasks(module, ansible_tasks) + self.setup_status[dep_common] = result + success &= result + return success async def pip_install(self, packages, constraints=None): diff --git a/bbot/core/modules.py b/bbot/core/modules.py index 2f3ce445c9..508836b2a4 100644 --- a/bbot/core/modules.py +++ b/bbot/core/modules.py @@ -13,9 +13,11 @@ from contextlib import suppress from bbot.core import CORE +from bbot.errors import BBOTError from bbot.logger import log_to_stderr from .flags import flag_descriptions +from .shared_deps import SHARED_DEPS from .helpers.misc import list_files, sha1, search_dict_by_key, search_format_dict, make_table, os_platform, mkdir @@ -43,6 +45,8 @@ class ModuleLoader: def __init__(self): self.core = CORE + self._shared_deps = dict(SHARED_DEPS) + self.__preloaded = {} self._modules = {} self._configs = {} @@ -250,6 +254,7 @@ def configs(self, type=None): def find_and_replace(self, **kwargs): self.__preloaded = search_format_dict(self.__preloaded, **kwargs) + self._shared_deps = search_format_dict(self._shared_deps, **kwargs) def check_type(self, module, type): return self._preloaded[module]["type"] == type @@ -312,6 +317,7 @@ def preload_module(self, module_file): deps_pip_constraints = [] deps_shell = [] deps_apt = [] + deps_common = [] ansible_tasks = [] python_code = open(module_file).read() # take a hash of the code so we can keep track of when it changes @@ -380,6 +386,11 @@ def preload_module(self, module_file): # ansible playbook elif any([target.id == "deps_ansible" for target in class_attr.targets]): ansible_tasks = ast.literal_eval(class_attr.value) + # shared/common module dependencies + if any([target.id == "deps_common" for target in class_attr.targets]): + for dep_common in class_attr.value.elts: + if type(dep_common.value) == str: + deps_common.append(dep_common.value) for task in ansible_tasks: if not "become" in task: @@ -403,13 +414,24 @@ def preload_module(self, module_file): "shell": deps_shell, "apt": deps_apt, "ansible": ansible_tasks, + "common": deps_common, }, "sudo": len(deps_apt) > 0, } - if any(x == True for x in search_dict_by_key("become", ansible_tasks)) or any( - x == True for x in search_dict_by_key("ansible_become", ansible_tasks) - ): - preloaded_data["sudo"] = True + ansible_task_list = list(ansible_tasks) + for dep_common in deps_common: + try: + ansible_task_list.extend(self._shared_deps[dep_common]) + except KeyError: + common_choices = ",".join(self._shared_deps) + raise BBOTError( + f'Error while preloading module "{module_file}": No shared dependency named "{dep_common}" (choices: {common_choices})' + ) + for ansible_task in ansible_task_list: + if any(x == True for x in search_dict_by_key("become", ansible_task)) or any( + x == True for x in search_dict_by_key("ansible_become", ansible_tasks) + ): + preloaded_data["sudo"] = True return preloaded_data def load_modules(self, module_names): diff --git a/bbot/core/shared_deps.py b/bbot/core/shared_deps.py new file mode 100644 index 0000000000..c7d83c4a71 --- /dev/null +++ b/bbot/core/shared_deps.py @@ -0,0 +1,95 @@ +DEP_FFUF = [ + { + "name": "Download ffuf", + "unarchive": { + "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_DEPS_FFUF_VERSION}/ffuf_#{BBOT_DEPS_FFUF_VERSION}_#{BBOT_OS}_#{BBOT_CPU_ARCH}.tar.gz", + "include": "ffuf", + "dest": "#{BBOT_TOOLS}", + "remote_src": True, + }, + } +] + +DEP_MASSDNS = [ + { + "name": "install dev tools", + "package": {"name": ["gcc", "git", "make"], "state": "present"}, + "become": True, + "ignore_errors": True, + }, + { + "name": "Download massdns source code", + "git": { + "repo": "https://github.com/blechschmidt/massdns.git", + "dest": "#{BBOT_TEMP}/massdns", + "single_branch": True, + "version": "master", + }, + }, + { + "name": "Build massdns (Linux)", + "command": {"chdir": "#{BBOT_TEMP}/massdns", "cmd": "make", "creates": "#{BBOT_TEMP}/massdns/bin/massdns"}, + "when": "ansible_facts['system'] == 'Linux'", + }, + { + "name": "Build massdns (non-Linux)", + "command": { + "chdir": "#{BBOT_TEMP}/massdns", + "cmd": "make nolinux", + "creates": "#{BBOT_TEMP}/massdns/bin/massdns", + }, + "when": "ansible_facts['system'] != 'Linux'", + }, + { + "name": "Install massdns", + "copy": {"src": "#{BBOT_TEMP}/massdns/bin/massdns", "dest": "#{BBOT_TOOLS}/", "mode": "u+x,g+x,o+x"}, + }, +] + +DEP_CHROMIUM = [ + { + "name": "Install Chromium (Non-Debian)", + "package": {"name": "chromium", "state": "present"}, + "become": True, + "when": "ansible_facts['os_family'] != 'Debian'", + "ignore_errors": True, + }, + { + "name": "Install Chromium dependencies (Debian)", + "package": { + "name": "libasound2,libatk-bridge2.0-0,libatk1.0-0,libcairo2,libcups2,libdrm2,libgbm1,libnss3,libpango-1.0-0,libxcomposite1,libxdamage1,libxfixes3,libxkbcommon0,libxrandr2", + "state": "present", + }, + "become": True, + "when": "ansible_facts['os_family'] == 'Debian'", + "ignore_errors": True, + }, + { + "name": "Get latest Chromium version (Debian)", + "uri": { + "url": "https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Linux_x64%2FLAST_CHANGE?alt=media", + "return_content": True, + }, + "register": "chromium_version", + "when": "ansible_facts['os_family'] == 'Debian'", + "ignore_errors": True, + }, + { + "name": "Download Chromium (Debian)", + "unarchive": { + "src": "https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Linux_x64%2F{{ chromium_version.content }}%2Fchrome-linux.zip?alt=media", + "remote_src": True, + "dest": "#{BBOT_TOOLS}", + "creates": "#{BBOT_TOOLS}/chrome-linux", + }, + "when": "ansible_facts['os_family'] == 'Debian'", + "ignore_errors": True, + }, +] + +# shared module dependencies -- ffuf, massdns, chromium, etc. +SHARED_DEPS = {} +for var, val in list(locals().items()): + if var.startswith("DEP_") and isinstance(val, list): + var = var.split("_", 1)[-1].lower() + SHARED_DEPS[var] = val diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 9eefb838b7..4b9b5210d1 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -19,6 +19,11 @@ http_proxy: # Web user-agent user_agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.2151.97 +# Tool dependencies +deps: + ffuf: + version: "2.1.0" + ### WEB SPIDER ### # Set the maximum number of HTTP links that can be followed in a row (0 == no spidering allowed) diff --git a/bbot/modules/deadly/ffuf.py b/bbot/modules/deadly/ffuf.py index 8382f1e669..a56c735062 100644 --- a/bbot/modules/deadly/ffuf.py +++ b/bbot/modules/deadly/ffuf.py @@ -17,7 +17,6 @@ class ffuf(BaseModule): "wordlist": "https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/Web-Content/raft-small-directories.txt", "lines": 5000, "max_depth": 0, - "version": "2.0.0", "extensions": "", } @@ -25,21 +24,10 @@ class ffuf(BaseModule): "wordlist": "Specify wordlist to use when finding directories", "lines": "take only the first N lines from the wordlist when finding directories", "max_depth": "the maximum directory depth to attempt to solve", - "version": "ffuf version", "extensions": "Optionally include a list of extensions to extend the keyword with (comma separated)", } - deps_ansible = [ - { - "name": "Download ffuf", - "unarchive": { - "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_MODULES_FFUF_VERSION}/ffuf_#{BBOT_MODULES_FFUF_VERSION}_#{BBOT_OS}_#{BBOT_CPU_ARCH}.tar.gz", - "include": "ffuf", - "dest": "#{BBOT_TOOLS}", - "remote_src": True, - }, - } - ] + deps_common = ["ffuf"] banned_characters = [" "] diff --git a/bbot/modules/ffuf_shortnames.py b/bbot/modules/ffuf_shortnames.py index ca02da8862..cfc58cba40 100644 --- a/bbot/modules/ffuf_shortnames.py +++ b/bbot/modules/ffuf_shortnames.py @@ -59,17 +59,7 @@ class ffuf_shortnames(ffuf): "find_delimiters": "Attempt to detect common delimiters and make additional ffuf runs against them", } - deps_ansible = [ - { - "name": "Download ffuf", - "unarchive": { - "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_MODULES_FFUF_VERSION}/ffuf_#{BBOT_MODULES_FFUF_VERSION}_#{BBOT_OS_PLATFORM}_#{BBOT_CPU_ARCH}.tar.gz", - "include": "ffuf", - "dest": "#{BBOT_TOOLS}", - "remote_src": True, - }, - } - ] + deps_common = ["ffuf"] in_scope_only = True diff --git a/bbot/modules/gowitness.py b/bbot/modules/gowitness.py index 3271ef93fd..ea8663bb77 100644 --- a/bbot/modules/gowitness.py +++ b/bbot/modules/gowitness.py @@ -29,45 +29,8 @@ class gowitness(BaseModule): "output_path": "where to save screenshots", "social": "Whether to screenshot social media webpages", } + deps_common = ["chromium"] deps_ansible = [ - { - "name": "Install Chromium (Non-Debian)", - "package": {"name": "chromium", "state": "present"}, - "become": True, - "when": "ansible_facts['os_family'] != 'Debian'", - "ignore_errors": True, - }, - { - "name": "Install Chromium dependencies (Debian)", - "package": { - "name": "libasound2,libatk-bridge2.0-0,libatk1.0-0,libcairo2,libcups2,libdrm2,libgbm1,libnss3,libpango-1.0-0,libxcomposite1,libxdamage1,libxfixes3,libxkbcommon0,libxrandr2", - "state": "present", - }, - "become": True, - "when": "ansible_facts['os_family'] == 'Debian'", - "ignore_errors": True, - }, - { - "name": "Get latest Chromium version (Debian)", - "uri": { - "url": "https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Linux_x64%2FLAST_CHANGE?alt=media", - "return_content": True, - }, - "register": "chromium_version", - "when": "ansible_facts['os_family'] == 'Debian'", - "ignore_errors": True, - }, - { - "name": "Download Chromium (Debian)", - "unarchive": { - "src": "https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Linux_x64%2F{{ chromium_version.content }}%2Fchrome-linux.zip?alt=media", - "remote_src": True, - "dest": "#{BBOT_TOOLS}", - "creates": "#{BBOT_TOOLS}/chrome-linux", - }, - "when": "ansible_facts['os_family'] == 'Debian'", - "ignore_errors": True, - }, { "name": "Download gowitness", "get_url": { diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index cad536dfdb..540b840418 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -38,41 +38,7 @@ class massdns(subdomain_enum): "max_depth": "How many subdomains deep to brute force, i.e. 5.4.3.2.1.evilcorp.com", } subdomain_file = None - deps_ansible = [ - { - "name": "install dev tools", - "package": {"name": ["gcc", "git", "make"], "state": "present"}, - "become": True, - "ignore_errors": True, - }, - { - "name": "Download massdns source code", - "git": { - "repo": "https://github.com/blechschmidt/massdns.git", - "dest": "#{BBOT_TEMP}/massdns", - "single_branch": True, - "version": "master", - }, - }, - { - "name": "Build massdns (Linux)", - "command": {"chdir": "#{BBOT_TEMP}/massdns", "cmd": "make", "creates": "#{BBOT_TEMP}/massdns/bin/massdns"}, - "when": "ansible_facts['system'] == 'Linux'", - }, - { - "name": "Build massdns (non-Linux)", - "command": { - "chdir": "#{BBOT_TEMP}/massdns", - "cmd": "make nolinux", - "creates": "#{BBOT_TEMP}/massdns/bin/massdns", - }, - "when": "ansible_facts['system'] != 'Linux'", - }, - { - "name": "Install massdns", - "copy": {"src": "#{BBOT_TEMP}/massdns/bin/massdns", "dest": "#{BBOT_TOOLS}/", "mode": "u+x,g+x,o+x"}, - }, - ] + deps_common = ["massdns"] reject_wildcards = "strict" _qsize = 10000 From 307aa11518bdece807eb4b91753b10284db70bfe Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 16 Apr 2024 17:53:49 -0400 Subject: [PATCH 40/63] add docker shared dependency --- bbot/core/shared_deps.py | 24 ++++++++++++++++++++++++ bbot/modules/deadly/dastardly.py | 24 +----------------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/bbot/core/shared_deps.py b/bbot/core/shared_deps.py index c7d83c4a71..7511177527 100644 --- a/bbot/core/shared_deps.py +++ b/bbot/core/shared_deps.py @@ -10,6 +10,30 @@ } ] +DEP_DOCKER = [ + { + "name": "Check if Docker is already installed", + "command": "docker --version", + "register": "docker_installed", + "ignore_errors": True, + }, + { + "name": "Install Docker (Non-Debian)", + "package": {"name": "docker", "state": "present"}, + "become": True, + "when": "ansible_facts['os_family'] != 'Debian' and docker_installed.rc != 0", + }, + { + "name": "Install Docker (Debian)", + "package": { + "name": "docker.io", + "state": "present", + }, + "become": True, + "when": "ansible_facts['os_family'] == 'Debian' and docker_installed.rc != 0", + }, +] + DEP_MASSDNS = [ { "name": "install dev tools", diff --git a/bbot/modules/deadly/dastardly.py b/bbot/modules/deadly/dastardly.py index c419f67d93..2bfd20f4ad 100644 --- a/bbot/modules/deadly/dastardly.py +++ b/bbot/modules/deadly/dastardly.py @@ -9,29 +9,7 @@ class dastardly(BaseModule): meta = {"description": "Lightweight web application security scanner"} deps_pip = ["lxml~=4.9.2"] - deps_ansible = [ - { - "name": "Check if Docker is already installed", - "command": "docker --version", - "register": "docker_installed", - "ignore_errors": True, - }, - { - "name": "Install Docker (Non-Debian)", - "package": {"name": "docker", "state": "present"}, - "become": True, - "when": "ansible_facts['os_family'] != 'Debian' and docker_installed.rc != 0", - }, - { - "name": "Install Docker (Debian)", - "package": { - "name": "docker.io", - "state": "present", - }, - "become": True, - "when": "ansible_facts['os_family'] == 'Debian' and docker_installed.rc != 0", - }, - ] + deps_common = ["docker"] per_hostport_only = True async def setup(self): From 22f877ec476d16531d4989465b45c43190549b32 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 16 Apr 2024 17:59:21 -0400 Subject: [PATCH 41/63] hook --> intercept --- bbot/modules/base.py | 8 ++++---- bbot/scanner/manager.py | 18 +++++++++--------- bbot/scanner/scanner.py | 18 +++++++++--------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index 26332aca68..c102b138d4 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -111,7 +111,7 @@ class BaseModule: _priority = 3 _name = "base" _type = "scan" - _hook = False + _intercept = False def __init__(self, scan): """Initializes a module instance. @@ -1415,7 +1415,7 @@ class InterceptModule(BaseModule): accept_dupes = True suppress_dupes = False - _hook = True + _intercept = True async def _worker(self): async with self.scan._acatch(context=self._worker, unhandled_is_critical=True): @@ -1491,7 +1491,7 @@ async def get_incoming_event(self): async def forward_event(self, event, kwargs): """ - Used for forwarding the event on to the next hook module + Used for forwarding the event on to the next intercept module """ await self.outgoing_event_queue.put((event, kwargs)) @@ -1500,7 +1500,7 @@ async def queue_outgoing_event(self, event, **kwargs): Used by emit_event() to raise new events to the scan """ # if this was a normal module, we'd put it in the outgoing queue - # but because it's a hook module, we need to queue it with the first hook module + # but because it's a intercept module, we need to queue it with the first intercept module await self.scan.ingress_module.queue_event(event, kwargs) async def queue_event(self, event, kwargs=None): diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index dd01d5879d..76d7b6028b 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -27,7 +27,7 @@ def priority(self): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._module_priority_weights = None - self._non_hook_modules = None + self._non_intercept_modules = None # track incoming duplicates module-by-module (for `suppress_dupes` attribute of modules) self.incoming_dup_tracker = set() @@ -95,20 +95,20 @@ async def forward_event(self, event, kwargs): await super().forward_event(event, kwargs) @property - def non_hook_modules(self): - if self._non_hook_modules is None: - self._non_hook_modules = [m for m in self.scan.modules.values() if not m._hook] - return self._non_hook_modules + def non_intercept_modules(self): + if self._non_intercept_modules is None: + self._non_intercept_modules = [m for m in self.scan.modules.values() if not m._intercept] + return self._non_intercept_modules @property def incoming_queues(self): - return [self.incoming_event_queue] + [m.outgoing_event_queue for m in self.non_hook_modules] + return [self.incoming_event_queue] + [m.outgoing_event_queue for m in self.non_intercept_modules] @property def module_priority_weights(self): if not self._module_priority_weights: # we subtract from six because lower priorities == higher weights - priorities = [5] + [6 - m.priority for m in self.non_hook_modules] + priorities = [5] + [6 - m.priority for m in self.non_intercept_modules] self._module_priority_weights = priorities return self._module_priority_weights @@ -213,8 +213,8 @@ async def forward_event(self, event, kwargs): self.scan.word_cloud.absorb_event(event) for mod in self.scan.modules.values(): - # don't distribute events to hook modules - if mod._hook: + # don't distribute events to intercept modules + if mod._intercept: continue acceptable_dup = (not is_outgoing_duplicate) or mod.accept_dupes graph_important = mod._is_graph_important(event) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 4a8ebf0f6a..36428cc533 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -252,14 +252,14 @@ async def _prep(self): # run each module's .setup() method succeeded, hard_failed, soft_failed = await self.setup_modules() - # hook modules get sewn together like human centipede - self.hook_modules = [m for m in self.modules.values() if m._hook] - for i, hook_module in enumerate(self.hook_modules[:-1]): - next_hook_module = self.hook_modules[i + 1] + # intercept modules get sewn together like human centipede + self.intercept_modules = [m for m in self.modules.values() if m._intercept] + for i, intercept_module in enumerate(self.intercept_modules[:-1]): + next_intercept_module = self.intercept_modules[i + 1] self.debug( - f"Setting hook module {hook_module.name}.outgoing_event_queue to next hook module {next_hook_module.name}.incoming_event_queue" + f"Setting intercept module {intercept_module.name}.outgoing_event_queue to next intercept module {next_intercept_module.name}.incoming_event_queue" ) - hook_module._outgoing_event_queue = next_hook_module.incoming_event_queue + intercept_module._outgoing_event_queue = next_intercept_module.incoming_event_queue # abort if there are no output modules num_output_modules = len([m for m in self.modules.values() if m._type == "output"]) @@ -428,8 +428,8 @@ async def setup_modules(self, remove_failed=True): else: self.info(f"Setup soft-failed for {module.name}: {msg}") soft_failed.append(module.name) - if (not status) and (module._hook or remove_failed): - # if a hook module fails setup, we always remove it + if (not status) and (module._intercept or remove_failed): + # if a intercept module fails setup, we always remove it self.modules.pop(module.name) return succeeded, hard_failed, soft_failed @@ -514,7 +514,7 @@ async def load_modules(self): f"Loaded {len(loaded_output_modules):,}/{len(self.preset.output_modules):,} output modules, ({','.join(loaded_output_modules)})" ) - # builtin hook modules + # builtin intercept modules self.ingress_module = ScanIngress(self) self.egress_module = ScanEgress(self) self.modules[self.ingress_module.name] = self.ingress_module From 3df1d638a484dab0eca20e25d01d139117614199 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 17 Apr 2024 13:52:15 -0400 Subject: [PATCH 42/63] fix inconsistency with dns host speculation --- bbot/modules/internal/dns.py | 21 +++++++++++++++------ bbot/modules/output/neo4j.py | 7 +++++-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/bbot/modules/internal/dns.py b/bbot/modules/internal/dns.py index c3db74891e..baaefc9144 100644 --- a/bbot/modules/internal/dns.py +++ b/bbot/modules/internal/dns.py @@ -3,9 +3,9 @@ from cachetools import LRUCache from bbot.errors import ValidationError -from bbot.modules.base import InterceptModule from bbot.core.helpers.dns.engine import all_rdtypes from bbot.core.helpers.async_helpers import NamedLock +from bbot.modules.base import InterceptModule, BaseModule class DNS(InterceptModule): @@ -16,6 +16,13 @@ class DNS(InterceptModule): _max_event_handlers = 25 scope_distance_modifier = None + class HostModule(BaseModule): + _name = "host" + _type = "internal" + + def _outgoing_dedup_hash(self, event): + return hash((event, self.name, event.always_emit)) + async def setup(self): self.dns_resolution = True # you can disable DNS resolution with either the "dns" or "dns_resolution" config options @@ -29,6 +36,8 @@ async def setup(self): self._event_cache = LRUCache(maxsize=10000) self._event_cache_locks = NamedLock() + self.host_module = self.HostModule(self.scan) + return True @property @@ -138,7 +147,7 @@ async def handle_event(self, event, kwargs): self.debug(f"Making {event} in-scope because it resolves to an in-scope resource") event.scope_distance = 0 - # check for wildcards, only if the event resolves to something isn't an IP + # check for wildcards, only if the event resolves to something that isn't an IP if (not event_is_ip) and (dns_children): if event.scope_distance <= self.scan.scope_search_distance: await self.handle_wildcard_event(event) @@ -165,12 +174,12 @@ async def handle_event(self, event, kwargs): if ( event.host and event.type not in ("DNS_NAME", "DNS_NAME_UNRESOLVED", "IP_ADDRESS", "IP_RANGE") - and not (event.type in ("OPEN_TCP_PORT", "URL_UNVERIFIED") and str(event.module) == "speculate") + and not ((event.type in ("OPEN_TCP_PORT", "URL_UNVERIFIED") and str(event.module) == "speculate")) ): - source_module = self.scan._make_dummy_module("host", _type="internal") - source_event = self.scan.make_event(event.host, "DNS_NAME", module=source_module, source=event) + source_event = self.scan.make_event(event.host, "DNS_NAME", module=self.host_module, source=event) # only emit the event if it's not already in the parent chain - if source_event is not None and source_event not in event.get_sources(): + if source_event is not None and (source_event.always_emit or source_event not in event.get_sources()): + self.critical(f"SPECULATING {event.host} FROM {event}") source_event.scope_distance = event.scope_distance if "target" in event.tags: source_event.add_tag("target") diff --git a/bbot/modules/output/neo4j.py b/bbot/modules/output/neo4j.py index 2cc0835443..2b0548ea9d 100644 --- a/bbot/modules/output/neo4j.py +++ b/bbot/modules/output/neo4j.py @@ -1,3 +1,4 @@ +from contextlib import suppress from neo4j import AsyncGraphDatabase from bbot.modules.output.base import BaseOutputModule @@ -78,5 +79,7 @@ async def merge_event(self, event, id_only=False): return (await result.single()).get("id(_)") async def cleanup(self): - await self.session.close() - await self.driver.close() + with suppress(Exception): + await self.session.close() + with suppress(Exception): + await self.driver.close() From 378ee8ac394da75623d17ff30108e099830b8f8b Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 17 Apr 2024 14:08:03 -0400 Subject: [PATCH 43/63] fix tests --- bbot/core/modules.py | 16 ++++++++-------- bbot/modules/deadly/vhost.py | 12 +----------- bbot/test/test_step_1/test_modules_basic.py | 3 ++- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/bbot/core/modules.py b/bbot/core/modules.py index 508836b2a4..b9ae83af5d 100644 --- a/bbot/core/modules.py +++ b/bbot/core/modules.py @@ -336,10 +336,10 @@ def preload_module(self, module_file): if any([target.id == "options" for target in class_attr.targets]): config.update(ast.literal_eval(class_attr.value)) # module options - if any([target.id == "options_desc" for target in class_attr.targets]): + elif any([target.id == "options_desc" for target in class_attr.targets]): options_desc.update(ast.literal_eval(class_attr.value)) # module metadata - if any([target.id == "meta" for target in class_attr.targets]): + elif any([target.id == "meta" for target in class_attr.targets]): meta = ast.literal_eval(class_attr.value) # class attributes that are lists @@ -350,27 +350,27 @@ def preload_module(self, module_file): if type(flag.value) == str: flags.add(flag.value) # watched events - if any([target.id == "watched_events" for target in class_attr.targets]): + elif any([target.id == "watched_events" for target in class_attr.targets]): for event_type in class_attr.value.elts: if type(event_type.value) == str: watched_events.add(event_type.value) # produced events - if any([target.id == "produced_events" for target in class_attr.targets]): + elif any([target.id == "produced_events" for target in class_attr.targets]): for event_type in class_attr.value.elts: if type(event_type.value) == str: produced_events.add(event_type.value) # bbot module dependencies - if any([target.id == "deps_modules" for target in class_attr.targets]): + elif any([target.id == "deps_modules" for target in class_attr.targets]): for dep_module in class_attr.value.elts: if type(dep_module.value) == str: deps_modules.add(dep_module.value) # python dependencies - if any([target.id == "deps_pip" for target in class_attr.targets]): + elif any([target.id == "deps_pip" for target in class_attr.targets]): for dep_pip in class_attr.value.elts: if type(dep_pip.value) == str: deps_pip.append(dep_pip.value) - if any([target.id == "deps_pip_constraints" for target in class_attr.targets]): + elif any([target.id == "deps_pip_constraints" for target in class_attr.targets]): for dep_pip in class_attr.value.elts: if type(dep_pip.value) == str: deps_pip_constraints.append(dep_pip.value) @@ -387,7 +387,7 @@ def preload_module(self, module_file): elif any([target.id == "deps_ansible" for target in class_attr.targets]): ansible_tasks = ast.literal_eval(class_attr.value) # shared/common module dependencies - if any([target.id == "deps_common" for target in class_attr.targets]): + elif any([target.id == "deps_common" for target in class_attr.targets]): for dep_common in class_attr.value.elts: if type(dep_common.value) == str: deps_common.append(dep_common.value) diff --git a/bbot/modules/deadly/vhost.py b/bbot/modules/deadly/vhost.py index e2908dbbe4..cf7be1f678 100644 --- a/bbot/modules/deadly/vhost.py +++ b/bbot/modules/deadly/vhost.py @@ -22,17 +22,7 @@ class vhost(ffuf): "lines": "take only the first N lines from the wordlist when finding directories", } - deps_ansible = [ - { - "name": "Download ffuf", - "unarchive": { - "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_MODULES_FFUF_VERSION}/ffuf_#{BBOT_MODULES_FFUF_VERSION}_#{BBOT_OS}_#{BBOT_CPU_ARCH}.tar.gz", - "include": "ffuf", - "dest": "#{BBOT_TOOLS}", - "remote_src": True, - }, - } - ] + deps_common = ["ffuf"] in_scope_only = True diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 5fc187fe3a..03273c0a70 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -108,7 +108,8 @@ async def test_modules_basic(scan, helpers, events, bbot_scanner, httpx_mock): assert type(all_preloaded["massdns"]["config"]["max_resolvers"]) == int assert all_preloaded["sslcert"]["deps"]["pip"] assert all_preloaded["sslcert"]["deps"]["apt"] - assert all_preloaded["massdns"]["deps"]["ansible"] + assert all_preloaded["massdns"]["deps"]["common"] + assert all_preloaded["gowitness"]["deps"]["ansible"] all_flags = set() From d6511debc3db1f6d1a29685abc7727f6aae1efdf Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 17 Apr 2024 14:09:38 -0400 Subject: [PATCH 44/63] update poetry.lock --- poetry.lock | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index bdb3e03a1b..0a89ff3bcf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1978,6 +1978,111 @@ files = [ [package.dependencies] pyyaml = "*" +[[package]] +name = "pyzmq" +version = "25.1.2" +description = "Python bindings for 0MQ" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:e624c789359f1a16f83f35e2c705d07663ff2b4d4479bad35621178d8f0f6ea4"}, + {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:49151b0efece79f6a79d41a461d78535356136ee70084a1c22532fc6383f4ad0"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9a5f194cf730f2b24d6af1f833c14c10f41023da46a7f736f48b6d35061e76e"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:faf79a302f834d9e8304fafdc11d0d042266667ac45209afa57e5efc998e3872"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f51a7b4ead28d3fca8dda53216314a553b0f7a91ee8fc46a72b402a78c3e43d"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0ddd6d71d4ef17ba5a87becf7ddf01b371eaba553c603477679ae817a8d84d75"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:246747b88917e4867e2367b005fc8eefbb4a54b7db363d6c92f89d69abfff4b6"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:00c48ae2fd81e2a50c3485de1b9d5c7c57cd85dc8ec55683eac16846e57ac979"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a68d491fc20762b630e5db2191dd07ff89834086740f70e978bb2ef2668be08"}, + {file = "pyzmq-25.1.2-cp310-cp310-win32.whl", hash = "sha256:09dfe949e83087da88c4a76767df04b22304a682d6154de2c572625c62ad6886"}, + {file = "pyzmq-25.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:fa99973d2ed20417744fca0073390ad65ce225b546febb0580358e36aa90dba6"}, + {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:82544e0e2d0c1811482d37eef297020a040c32e0687c1f6fc23a75b75db8062c"}, + {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:01171fc48542348cd1a360a4b6c3e7d8f46cdcf53a8d40f84db6707a6768acc1"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc69c96735ab501419c432110016329bf0dea8898ce16fab97c6d9106dc0b348"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3e124e6b1dd3dfbeb695435dff0e383256655bb18082e094a8dd1f6293114642"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7598d2ba821caa37a0f9d54c25164a4fa351ce019d64d0b44b45540950458840"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d1299d7e964c13607efd148ca1f07dcbf27c3ab9e125d1d0ae1d580a1682399d"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4e6f689880d5ad87918430957297c975203a082d9a036cc426648fcbedae769b"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cc69949484171cc961e6ecd4a8911b9ce7a0d1f738fcae717177c231bf77437b"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9880078f683466b7f567b8624bfc16cad65077be046b6e8abb53bed4eeb82dd3"}, + {file = "pyzmq-25.1.2-cp311-cp311-win32.whl", hash = "sha256:4e5837af3e5aaa99a091302df5ee001149baff06ad22b722d34e30df5f0d9097"}, + {file = "pyzmq-25.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:25c2dbb97d38b5ac9fd15586e048ec5eb1e38f3d47fe7d92167b0c77bb3584e9"}, + {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:11e70516688190e9c2db14fcf93c04192b02d457b582a1f6190b154691b4c93a"}, + {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:313c3794d650d1fccaaab2df942af9f2c01d6217c846177cfcbc693c7410839e"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b3cbba2f47062b85fe0ef9de5b987612140a9ba3a9c6d2543c6dec9f7c2ab27"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc31baa0c32a2ca660784d5af3b9487e13b61b3032cb01a115fce6588e1bed30"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c9087b109070c5ab0b383079fa1b5f797f8d43e9a66c07a4b8b8bdecfd88ee"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f8429b17cbb746c3e043cb986328da023657e79d5ed258b711c06a70c2ea7537"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5074adeacede5f810b7ef39607ee59d94e948b4fd954495bdb072f8c54558181"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7ae8f354b895cbd85212da245f1a5ad8159e7840e37d78b476bb4f4c3f32a9fe"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b264bf2cc96b5bc43ce0e852be995e400376bd87ceb363822e2cb1964fcdc737"}, + {file = "pyzmq-25.1.2-cp312-cp312-win32.whl", hash = "sha256:02bbc1a87b76e04fd780b45e7f695471ae6de747769e540da909173d50ff8e2d"}, + {file = "pyzmq-25.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:ced111c2e81506abd1dc142e6cd7b68dd53747b3b7ae5edbea4578c5eeff96b7"}, + {file = "pyzmq-25.1.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:7b6d09a8962a91151f0976008eb7b29b433a560fde056ec7a3db9ec8f1075438"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:967668420f36878a3c9ecb5ab33c9d0ff8d054f9c0233d995a6d25b0e95e1b6b"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5edac3f57c7ddaacdb4d40f6ef2f9e299471fc38d112f4bc6d60ab9365445fb0"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0dabfb10ef897f3b7e101cacba1437bd3a5032ee667b7ead32bbcdd1a8422fe7"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:2c6441e0398c2baacfe5ba30c937d274cfc2dc5b55e82e3749e333aabffde561"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:16b726c1f6c2e7625706549f9dbe9b06004dfbec30dbed4bf50cbdfc73e5b32a"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:a86c2dd76ef71a773e70551a07318b8e52379f58dafa7ae1e0a4be78efd1ff16"}, + {file = "pyzmq-25.1.2-cp36-cp36m-win32.whl", hash = "sha256:359f7f74b5d3c65dae137f33eb2bcfa7ad9ebefd1cab85c935f063f1dbb245cc"}, + {file = "pyzmq-25.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:55875492f820d0eb3417b51d96fea549cde77893ae3790fd25491c5754ea2f68"}, + {file = "pyzmq-25.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b8c8a419dfb02e91b453615c69568442e897aaf77561ee0064d789705ff37a92"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8807c87fa893527ae8a524c15fc505d9950d5e856f03dae5921b5e9aa3b8783b"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5e319ed7d6b8f5fad9b76daa0a68497bc6f129858ad956331a5835785761e003"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:3c53687dde4d9d473c587ae80cc328e5b102b517447456184b485587ebd18b62"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9add2e5b33d2cd765ad96d5eb734a5e795a0755f7fc49aa04f76d7ddda73fd70"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e690145a8c0c273c28d3b89d6fb32c45e0d9605b2293c10e650265bf5c11cfec"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:00a06faa7165634f0cac1abb27e54d7a0b3b44eb9994530b8ec73cf52e15353b"}, + {file = "pyzmq-25.1.2-cp37-cp37m-win32.whl", hash = "sha256:0f97bc2f1f13cb16905a5f3e1fbdf100e712d841482b2237484360f8bc4cb3d7"}, + {file = "pyzmq-25.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6cc0020b74b2e410287e5942e1e10886ff81ac77789eb20bec13f7ae681f0fdd"}, + {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:bef02cfcbded83473bdd86dd8d3729cd82b2e569b75844fb4ea08fee3c26ae41"}, + {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e10a4b5a4b1192d74853cc71a5e9fd022594573926c2a3a4802020360aa719d8"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8c5f80e578427d4695adac6fdf4370c14a2feafdc8cb35549c219b90652536ae"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5dde6751e857910c1339890f3524de74007958557593b9e7e8c5f01cd919f8a7"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea1608dd169da230a0ad602d5b1ebd39807ac96cae1845c3ceed39af08a5c6df"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0f513130c4c361201da9bc69df25a086487250e16b5571ead521b31ff6b02220"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:019744b99da30330798bb37df33549d59d380c78e516e3bab9c9b84f87a9592f"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2e2713ef44be5d52dd8b8e2023d706bf66cb22072e97fc71b168e01d25192755"}, + {file = "pyzmq-25.1.2-cp38-cp38-win32.whl", hash = "sha256:07cd61a20a535524906595e09344505a9bd46f1da7a07e504b315d41cd42eb07"}, + {file = "pyzmq-25.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb7e49a17fb8c77d3119d41a4523e432eb0c6932187c37deb6fbb00cc3028088"}, + {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:94504ff66f278ab4b7e03e4cba7e7e400cb73bfa9d3d71f58d8972a8dc67e7a6"}, + {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6dd0d50bbf9dca1d0bdea219ae6b40f713a3fb477c06ca3714f208fd69e16fd8"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:004ff469d21e86f0ef0369717351073e0e577428e514c47c8480770d5e24a565"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c0b5ca88a8928147b7b1e2dfa09f3b6c256bc1135a1338536cbc9ea13d3b7add"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c9a79f1d2495b167119d02be7448bfba57fad2a4207c4f68abc0bab4b92925b"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:518efd91c3d8ac9f9b4f7dd0e2b7b8bf1a4fe82a308009016b07eaa48681af82"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1ec23bd7b3a893ae676d0e54ad47d18064e6c5ae1fadc2f195143fb27373f7f6"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db36c27baed588a5a8346b971477b718fdc66cf5b80cbfbd914b4d6d355e44e2"}, + {file = "pyzmq-25.1.2-cp39-cp39-win32.whl", hash = "sha256:39b1067f13aba39d794a24761e385e2eddc26295826530a8c7b6c6c341584289"}, + {file = "pyzmq-25.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:8e9f3fabc445d0ce320ea2c59a75fe3ea591fdbdeebec5db6de530dd4b09412e"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a8c1d566344aee826b74e472e16edae0a02e2a044f14f7c24e123002dcff1c05"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:759cfd391a0996345ba94b6a5110fca9c557ad4166d86a6e81ea526c376a01e8"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c61e346ac34b74028ede1c6b4bcecf649d69b707b3ff9dc0fab453821b04d1e"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cb8fc1f8d69b411b8ec0b5f1ffbcaf14c1db95b6bccea21d83610987435f1a4"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3c00c9b7d1ca8165c610437ca0c92e7b5607b2f9076f4eb4b095c85d6e680a1d"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:df0c7a16ebb94452d2909b9a7b3337940e9a87a824c4fc1c7c36bb4404cb0cde"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:45999e7f7ed5c390f2e87ece7f6c56bf979fb213550229e711e45ecc7d42ccb8"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ac170e9e048b40c605358667aca3d94e98f604a18c44bdb4c102e67070f3ac9b"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1b604734bec94f05f81b360a272fc824334267426ae9905ff32dc2be433ab96"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a793ac733e3d895d96f865f1806f160696422554e46d30105807fdc9841b9f7d"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0806175f2ae5ad4b835ecd87f5f85583316b69f17e97786f7443baaf54b9bb98"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ef12e259e7bc317c7597d4f6ef59b97b913e162d83b421dd0db3d6410f17a244"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea253b368eb41116011add00f8d5726762320b1bda892f744c91997b65754d73"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b9b1f2ad6498445a941d9a4fee096d387fee436e45cc660e72e768d3d8ee611"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:8b14c75979ce932c53b79976a395cb2a8cd3aaf14aef75e8c2cb55a330b9b49d"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:889370d5174a741a62566c003ee8ddba4b04c3f09a97b8000092b7ca83ec9c49"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a18fff090441a40ffda8a7f4f18f03dc56ae73f148f1832e109f9bffa85df15"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99a6b36f95c98839ad98f8c553d8507644c880cf1e0a57fe5e3a3f3969040882"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4345c9a27f4310afbb9c01750e9461ff33d6fb74cd2456b107525bbeebcb5be3"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3516e0b6224cf6e43e341d56da15fd33bdc37fa0c06af4f029f7d7dfceceabbc"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:146b9b1f29ead41255387fb07be56dc29639262c0f7344f570eecdcd8d683314"}, + {file = "pyzmq-25.1.2.tar.gz", hash = "sha256:93f1aa311e8bb912e34f004cf186407a4e90eec4f0ecc0efd26056bf7eda0226"}, +] + +[package.dependencies] +cffi = {version = "*", markers = "implementation_name == \"pypy\""} + [[package]] name = "regex" version = "2024.4.16" @@ -2508,4 +2613,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "2d76a3fa67dcc00e96922a7fcc640f7ba7605ab29c260896587521f258a0b9d0" +content-hash = "daf56ec78cff336b530e48e83f0a854c3356a802e5e0cf9456b7a5adfe962354" From 36e39751f82140cf3b23d48561f4ac8e3fc902c4 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 17 Apr 2024 17:19:28 -0400 Subject: [PATCH 45/63] WIP regex optimization --- bbot/core/helpers/helper.py | 71 +++++++++++++++++++++++++++++++ bbot/core/helpers/regexes.py | 2 +- bbot/modules/badsecrets.py | 2 +- bbot/modules/bevigil.py | 2 +- bbot/modules/internal/excavate.py | 13 +++--- bbot/modules/secretsdb.py | 2 +- bbot/modules/sslcert.py | 2 +- bbot/modules/wafw00f.py | 6 +-- bbot/modules/wappalyzer.py | 4 +- bbot/modules/wayback.py | 2 +- bbot/scanner/scanner.py | 44 +------------------ poetry.lock | 2 +- pyproject.toml | 1 + 13 files changed, 92 insertions(+), 61 deletions(-) diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index d2bb4bb194..ecf7c7cfc6 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -1,7 +1,12 @@ import os +import asyncio import logging +import regex as re from pathlib import Path +import multiprocessing as mp +from functools import partial from cloudcheck import cloud_providers +from concurrent.futures import ProcessPoolExecutor from . import misc from .dns import DNSHelper @@ -65,6 +70,18 @@ def __init__(self, preset): self.mkdir(self.tools_dir) self.mkdir(self.lib_dir) + self._loop = None + + # multiprocessing thread pool + start_method = mp.get_start_method() + if start_method != "spawn": + self.warning(f"Multiprocessing spawn method is set to {start_method}.") + + # we spawn 1 fewer processes than cores + # this helps to avoid locking up the system or competing with the main python process for cpu time + num_processes = max(1, mp.cpu_count() - 1) + self.process_pool = ProcessPoolExecutor(max_workers=num_processes) + self.cloud = cloud_providers self.dns = DNSHelper(self) @@ -73,6 +90,28 @@ def __init__(self, preset): self.word_cloud = WordCloud(self) self.dummy_modules = {} + def ensure_compiled_regex(self, r): + """ + Make sure a regex has been compiled + """ + if not isinstance(r, re.Pattern): + raise ValueError("Regex must be compiled first!") + + async def re_search(self, compiled_regex, *args, **kwargs): + self.ensure_compiled_regex(compiled_regex) + return await self.run_in_executor(compiled_regex.search, *args, **kwargs) + + async def re_findall(self, compiled_regex, *args, **kwargs): + self.ensure_compiled_regex(compiled_regex) + return await self.run_in_executor(compiled_regex.findall, *args, **kwargs) + + async def re_finditer(self, compiled_regex, *args, **kwargs): + self.ensure_compiled_regex(compiled_regex) + return await self.run_in_executor(self._re_finditer, compiled_regex, *args, **kwargs) + + def _re_finditer(self, compiled_regex, *args, **kwargs): + return list(compiled_regex.finditer(*args, **kwargs)) + def interactsh(self, *args, **kwargs): return Interactsh(self, *args, **kwargs) @@ -103,6 +142,38 @@ def config(self): def scan(self): return self.preset.scan + @property + def loop(self): + """ + Get the current event loop + """ + if self._loop is None: + self._loop = asyncio.get_running_loop() + return self._loop + + def run_in_executor(self, callback, *args, **kwargs): + """ + Run a synchronous task in the event loop's default thread pool executor + + Examples: + Execute callback: + >>> result = await self.helpers.run_in_executor(callback_fn, arg1, arg2) + """ + callback = partial(callback, **kwargs) + return self.loop.run_in_executor(None, callback, *args) + + def run_in_executor_mp(self, callback, *args, **kwargs): + """ + Same as run_in_executor() except with a process pool executor + Use only in cases where callback is CPU-bound + + Examples: + Execute callback: + >>> result = await self.helpers.run_in_executor_mp(callback_fn, arg1, arg2) + """ + callback = partial(callback, **kwargs) + return self.loop.run_in_executor(self.process_pool, callback, *args) + @property def in_tests(self): return os.environ.get("BBOT_TESTING", "") == "True" diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py index 6e80801a67..f5fb78f4ca 100644 --- a/bbot/core/helpers/regexes.py +++ b/bbot/core/helpers/regexes.py @@ -1,4 +1,4 @@ -import re +import regex as re from collections import OrderedDict # for extracting words from strings diff --git a/bbot/modules/badsecrets.py b/bbot/modules/badsecrets.py index 5626314fea..01cc36ed81 100644 --- a/bbot/modules/badsecrets.py +++ b/bbot/modules/badsecrets.py @@ -33,7 +33,7 @@ async def handle_event(self, event): resp_cookies[c2[0]] = c2[1] if resp_body or resp_cookies: try: - r_list = await self.scan.run_in_executor_mp( + r_list = await self.helpers.run_in_executor_mp( carve_all_modules, body=resp_body, headers=resp_headers, diff --git a/bbot/modules/bevigil.py b/bbot/modules/bevigil.py index 435ceae08f..bbf339b080 100644 --- a/bbot/modules/bevigil.py +++ b/bbot/modules/bevigil.py @@ -34,7 +34,7 @@ async def handle_event(self, event): if self.urls: urls = await self.query(query, request_fn=self.request_urls, parse_fn=self.parse_urls) if urls: - for parsed_url in await self.scan.run_in_executor_mp(self.helpers.validators.collapse_urls, urls): + for parsed_url in await self.helpers.run_in_executor_mp(self.helpers.validators.collapse_urls, urls): await self.emit_event(parsed_url.geturl(), "URL_UNVERIFIED", source=event) async def request_subdomains(self, query): diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 1af70b051f..199a72b5ef 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -1,7 +1,7 @@ -import re import html import base64 import jwt as j +import regex as re from urllib.parse import urljoin from bbot.core.helpers.regexes import _email_regex, dns_name_regex @@ -14,6 +14,7 @@ class BaseExtractor: def __init__(self, excavate): self.excavate = excavate + self.helpers = excavate.helpers self.compiled_regexes = {} for rname, r in self.regexes.items(): self.compiled_regexes[rname] = re.compile(r) @@ -29,7 +30,7 @@ async def _search(self, content, event, **kwargs): for name, regex in self.compiled_regexes.items(): # yield to event loop await self.excavate.helpers.sleep(0) - for result in regex.findall(content): + for result in await self.helpers.re_findall(regex, content): yield result, name async def report(self, result, name, event): @@ -39,14 +40,14 @@ async def report(self, result, name, event): class CSPExtractor(BaseExtractor): regexes = {"CSP": r"(?i)(?m)Content-Security-Policy:.+$"} - def extract_domains(self, csp): - domains = dns_name_regex.findall(csp) + async def extract_domains(self, csp): + domains = await self.helpers.re_findall(dns_name_regex, csp) unique_domains = set(domains) return unique_domains async def search(self, content, event, **kwargs): async for csp, name in self._search(content, event, **kwargs): - extracted_domains = self.extract_domains(csp) + extracted_domains = await self.extract_domains(csp) for domain in extracted_domains: await self.report(domain, event, **kwargs) @@ -125,7 +126,7 @@ async def _search(self, content, event, **kwargs): for name, regex in self.compiled_regexes.items(): # yield to event loop await self.excavate.helpers.sleep(0) - for result in regex.findall(content): + for result in await self.helpers.re_findall(regex, content): if name.startswith("full"): protocol, other = result result = f"{protocol}://{other}" diff --git a/bbot/modules/secretsdb.py b/bbot/modules/secretsdb.py index d9462ae195..d94a3b0a20 100644 --- a/bbot/modules/secretsdb.py +++ b/bbot/modules/secretsdb.py @@ -46,7 +46,7 @@ async def setup(self): async def handle_event(self, event): resp_body = event.data.get("body", "") resp_headers = event.data.get("raw_header", "") - all_matches = await self.scan.run_in_executor(self.search_data, resp_body, resp_headers) + all_matches = await self.helpers.run_in_executor(self.search_data, resp_body, resp_headers) for matches, name in all_matches: matches = [m.string[m.start() : m.end()] for m in matches] description = f"Possible secret ({name}): {matches}" diff --git a/bbot/modules/sslcert.py b/bbot/modules/sslcert.py index c6fec1ea97..42f34d23e3 100644 --- a/bbot/modules/sslcert.py +++ b/bbot/modules/sslcert.py @@ -119,7 +119,7 @@ async def visit_host(self, host, port): # Connect to the host try: transport, _ = await asyncio.wait_for( - self.scan._loop.create_connection(lambda: asyncio.Protocol(), host, port, ssl=ssl_context), + self.helpers.loop.create_connection(lambda: asyncio.Protocol(), host, port, ssl=ssl_context), timeout=self.timeout, ) except asyncio.TimeoutError: diff --git a/bbot/modules/wafw00f.py b/bbot/modules/wafw00f.py index 8fd0bc3d4f..b8786e494e 100644 --- a/bbot/modules/wafw00f.py +++ b/bbot/modules/wafw00f.py @@ -34,14 +34,14 @@ async def filter_event(self, event): async def handle_event(self, event): url = f"{event.parsed.scheme}://{event.parsed.netloc}/" - WW = await self.scan.run_in_executor(wafw00f_main.WAFW00F, url, followredirect=False) - waf_detections = await self.scan.run_in_executor(WW.identwaf) + WW = await self.helpers.run_in_executor(wafw00f_main.WAFW00F, url, followredirect=False) + waf_detections = await self.helpers.run_in_executor(WW.identwaf) if waf_detections: for waf in waf_detections: await self.emit_event({"host": str(event.host), "url": url, "WAF": waf}, "WAF", source=event) else: if self.config.get("generic_detect") == True: - generic = await self.scan.run_in_executor(WW.genericdetect) + generic = await self.helpers.run_in_executor(WW.genericdetect) if generic: await self.emit_event( { diff --git a/bbot/modules/wappalyzer.py b/bbot/modules/wappalyzer.py index 00e18f4290..24fa54bcfa 100644 --- a/bbot/modules/wappalyzer.py +++ b/bbot/modules/wappalyzer.py @@ -23,11 +23,11 @@ class wappalyzer(BaseModule): _max_event_handlers = 5 async def setup(self): - self.wappalyzer = await self.scan.run_in_executor(Wappalyzer.latest) + self.wappalyzer = await self.helpers.run_in_executor(Wappalyzer.latest) return True async def handle_event(self, event): - for res in await self.scan.run_in_executor(self.wappalyze, event.data): + for res in await self.helpers.run_in_executor(self.wappalyze, event.data): await self.emit_event( {"technology": res.lower(), "url": event.data["url"], "host": str(event.host)}, "TECHNOLOGY", event ) diff --git a/bbot/modules/wayback.py b/bbot/modules/wayback.py index 92dc78db5c..526e0b3eb6 100644 --- a/bbot/modules/wayback.py +++ b/bbot/modules/wayback.py @@ -56,7 +56,7 @@ async def query(self, query): dns_names = set() collapsed_urls = 0 start_time = datetime.now() - parsed_urls = await self.scan.run_in_executor_mp( + parsed_urls = await self.helpers.run_in_executor_mp( self.helpers.validators.collapse_urls, urls, threshold=self.garbage_threshold, diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 36428cc533..c231b9a3b9 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -6,11 +6,8 @@ import contextlib from pathlib import Path from sys import exc_info -import multiprocessing as mp from datetime import datetime -from functools import partial from collections import OrderedDict -from concurrent.futures import ProcessPoolExecutor from bbot import __version__ @@ -207,16 +204,6 @@ def __init__( self.ticker_task = None self.dispatcher_tasks = [] - # multiprocessing thread pool - start_method = mp.get_start_method() - if start_method != "spawn": - self.warning(f"Multiprocessing spawn method is set to {start_method}.") - - # we spawn 1 fewer processes than cores - # this helps to avoid locking up the system or competing with the main python process for cpu time - num_processes = max(1, mp.cpu_count() - 1) - self.process_pool = ProcessPoolExecutor(max_workers=num_processes) - self._stopping = False self._dns_regexes = None @@ -758,7 +745,7 @@ def _cancel_tasks(self): tasks += self._manager_worker_loop_tasks self.helpers.cancel_tasks_sync(tasks) # process pool - self.process_pool.shutdown(cancel_futures=True) + self.helpers.process_pool.shutdown(cancel_futures=True) async def _report(self): """Asynchronously executes the `report()` method for each module in the scan. @@ -918,29 +905,6 @@ def root_event(self): root_event.module = self._make_dummy_module(name="TARGET", _type="TARGET") return root_event - def run_in_executor(self, callback, *args, **kwargs): - """ - Run a synchronous task in the event loop's default thread pool executor - - Examples: - Execute callback: - >>> result = await self.scan.run_in_executor(callback_fn, arg1, arg2) - """ - callback = partial(callback, **kwargs) - return self._loop.run_in_executor(None, callback, *args) - - def run_in_executor_mp(self, callback, *args, **kwargs): - """ - Same as run_in_executor() except with a process pool executor - Use only in cases where callback is CPU-bound - - Examples: - Execute callback: - >>> result = await self.scan.run_in_executor_mp(callback_fn, arg1, arg2) - """ - callback = partial(callback, **kwargs) - return self._loop.run_in_executor(self.process_pool, callback, *args) - @property def dns_regexes(self): """ @@ -1109,12 +1073,6 @@ def _fail_setup(self, msg): msg += " (--force to run module anyway)" raise ScanError(msg) - @property - def _loop(self): - if self.__loop is None: - self.__loop = asyncio.get_event_loop() - return self.__loop - def _load_modules(self, modules): modules = [str(m) for m in modules] loaded_modules = {} diff --git a/poetry.lock b/poetry.lock index 0a89ff3bcf..74d992ebcf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2613,4 +2613,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "daf56ec78cff336b530e48e83f0a854c3356a802e5e0cf9456b7a5adfe962354" +content-hash = "9665625c52f491373ac3f4306cacd3626e9c4dbc6ee79123c387693e2aa74ac7" diff --git a/pyproject.toml b/pyproject.toml index 4058cacb73..21ff710c00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ cachetools = "^5.3.2" socksio = "^1.0.0" jinja2 = "^3.1.3" pyzmq = "^25.1.2" +regex = "^2024.4.16" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From 71e0c23b933f8fa038ff3bae33ed1c18f25c0997 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 17 Apr 2024 17:23:13 -0400 Subject: [PATCH 46/63] remove debug statement --- bbot/modules/internal/dns.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bbot/modules/internal/dns.py b/bbot/modules/internal/dns.py index baaefc9144..ea5e4efcf1 100644 --- a/bbot/modules/internal/dns.py +++ b/bbot/modules/internal/dns.py @@ -179,7 +179,6 @@ async def handle_event(self, event, kwargs): source_event = self.scan.make_event(event.host, "DNS_NAME", module=self.host_module, source=event) # only emit the event if it's not already in the parent chain if source_event is not None and (source_event.always_emit or source_event not in event.get_sources()): - self.critical(f"SPECULATING {event.host} FROM {event}") source_event.scope_distance = event.scope_distance if "target" in event.tags: source_event.add_tag("target") From 8717ea49f604829f14c929828625d7f131659979 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 19 Apr 2024 01:42:13 -0400 Subject: [PATCH 47/63] more WIP regex optimizations --- bbot/core/helpers/helper.py | 25 +---------- bbot/core/helpers/misc.py | 2 +- bbot/core/helpers/regex.py | 65 +++++++++++++++++++++++++++ bbot/core/helpers/regexes.py | 4 ++ bbot/modules/ajaxpro.py | 4 +- bbot/modules/azure_tenant.py | 6 +-- bbot/modules/dehashed.py | 2 +- bbot/modules/emailformat.py | 2 +- bbot/modules/hunt.py | 3 +- bbot/modules/internal/cloud.py | 2 +- bbot/modules/internal/excavate.py | 10 ++--- bbot/modules/massdns.py | 2 +- bbot/modules/oauth.py | 2 +- bbot/modules/paramminer_headers.py | 8 ++-- bbot/modules/pgp.py | 2 +- bbot/modules/report/asn.py | 2 +- bbot/modules/sitedossier.py | 11 +++-- bbot/modules/skymem.py | 12 +++-- bbot/scanner/scanner.py | 2 +- bbot/test/test_step_1/test_helpers.py | 22 +++++---- 20 files changed, 122 insertions(+), 66 deletions(-) create mode 100644 bbot/core/helpers/regex.py diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index ecf7c7cfc6..16afc05cd2 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -1,7 +1,6 @@ import os import asyncio import logging -import regex as re from pathlib import Path import multiprocessing as mp from functools import partial @@ -12,6 +11,7 @@ from .dns import DNSHelper from .web import WebHelper from .diff import HttpCompare +from .regex import RegexHelper from .wordcloud import WordCloud from .interactsh import Interactsh from ...scanner.target import Target @@ -84,34 +84,13 @@ def __init__(self, preset): self.cloud = cloud_providers + self.re = RegexHelper(self) self.dns = DNSHelper(self) self.web = WebHelper(self) self.depsinstaller = DepsInstaller(self) self.word_cloud = WordCloud(self) self.dummy_modules = {} - def ensure_compiled_regex(self, r): - """ - Make sure a regex has been compiled - """ - if not isinstance(r, re.Pattern): - raise ValueError("Regex must be compiled first!") - - async def re_search(self, compiled_regex, *args, **kwargs): - self.ensure_compiled_regex(compiled_regex) - return await self.run_in_executor(compiled_regex.search, *args, **kwargs) - - async def re_findall(self, compiled_regex, *args, **kwargs): - self.ensure_compiled_regex(compiled_regex) - return await self.run_in_executor(compiled_regex.findall, *args, **kwargs) - - async def re_finditer(self, compiled_regex, *args, **kwargs): - self.ensure_compiled_regex(compiled_regex) - return await self.run_in_executor(self._re_finditer, compiled_regex, *args, **kwargs) - - def _re_finditer(self, compiled_regex, *args, **kwargs): - return list(compiled_regex.finditer(*args, **kwargs)) - def interactsh(self, *args, **kwargs): return Interactsh(self, *args, **kwargs) diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index 7956466eb4..27a3718edc 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -1,5 +1,4 @@ import os -import re import sys import json import random @@ -7,6 +6,7 @@ import asyncio import logging import ipaddress +import regex as re import subprocess as sp from pathlib import Path from contextlib import suppress diff --git a/bbot/core/helpers/regex.py b/bbot/core/helpers/regex.py new file mode 100644 index 0000000000..99116a5c84 --- /dev/null +++ b/bbot/core/helpers/regex.py @@ -0,0 +1,65 @@ +import regex as re +from . import misc + + +class RegexHelper: + """ + Class for misc CPU-intensive regex operations + + Offloads regex processing to other CPU cores via GIL release + thread pool + """ + + def __init__(self, parent_helper): + self.parent_helper = parent_helper + + def ensure_compiled_regex(self, r): + """ + Make sure a regex has been compiled + """ + if not isinstance(r, re.Pattern): + raise ValueError("Regex must be compiled first!") + + def compile(self, *args, **kwargs): + return re.compile(*args, **kwargs) + + async def search(self, compiled_regex, *args, **kwargs): + self.ensure_compiled_regex(compiled_regex) + return await self.parent_helper.run_in_executor(compiled_regex.search, *args, **kwargs) + + async def findall(self, compiled_regex, *args, **kwargs): + self.ensure_compiled_regex(compiled_regex) + return await self.parent_helper.run_in_executor(compiled_regex.findall, *args, **kwargs) + + async def finditer(self, compiled_regex, *args, **kwargs): + self.ensure_compiled_regex(compiled_regex) + return await self.parent_helper.run_in_executor(self._finditer, compiled_regex, *args, **kwargs) + + async def finditer_multi(self, compiled_regexes, *args, **kwargs): + for r in compiled_regexes: + self.ensure_compiled_regex(r) + return await self.parent_helper.run_in_executor(self._finditer_multi, compiled_regexes, *args, **kwargs) + + def _finditer_multi(self, compiled_regexes, *args, **kwargs): + matches = [] + for r in compiled_regexes: + for m in r.finditer(*args, **kwargs): + matches.append(m) + return matches + + def _finditer(self, compiled_regex, *args, **kwargs): + return list(compiled_regex.finditer(*args, **kwargs)) + + async def extract_params_html(self, *args, **kwargs): + return await self.parent_helper.run_in_executor(misc.extract_params_html, *args, **kwargs) + + async def extract_emails(self, *args, **kwargs): + return await self.parent_helper.run_in_executor(misc.extract_emails, *args, **kwargs) + + async def search_dict_values(self, *args, **kwargs): + def _search_dict_values(*_args, **_kwargs): + return list(misc.search_dict_values(*_args, **_kwargs)) + + return await self.parent_helper.run_in_executor(_search_dict_values, *args, **kwargs) + + async def recursive_decode(self, *args, **kwargs): + return await self.parent_helper.run_in_executor(misc.recursive_decode, *args, **kwargs) diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py index f5fb78f4ca..4e2ada0c24 100644 --- a/bbot/core/helpers/regexes.py +++ b/bbot/core/helpers/regexes.py @@ -104,3 +104,7 @@ _extract_host_regex = r"(?:[a-z0-9]{1,20}://)?(?:[^?]*@)?(" + valid_netloc + ")" extract_host_regex = re.compile(_extract_host_regex, re.I) + +# for use in recursive_decode() +encoded_regex = re.compile(r"%[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\[ntrbv]") +backslash_regex = re.compile(r"(?P\\+)(?P[ntrvb])") diff --git a/bbot/modules/ajaxpro.py b/bbot/modules/ajaxpro.py index 46d475cca5..ba3e0eb3ed 100644 --- a/bbot/modules/ajaxpro.py +++ b/bbot/modules/ajaxpro.py @@ -1,4 +1,4 @@ -import re +import regex as re from bbot.modules.base import BaseModule @@ -38,7 +38,7 @@ async def handle_event(self, event): elif event.type == "HTTP_RESPONSE": resp_body = event.data.get("body", None) if resp_body: - ajaxpro_regex_result = self.ajaxpro_regex.search(resp_body) + ajaxpro_regex_result = await self.helpers.re.search(self.ajaxpro_regex, resp_body) if ajaxpro_regex_result: ajax_pro_path = ajaxpro_regex_result.group(0) await self.emit_event( diff --git a/bbot/modules/azure_tenant.py b/bbot/modules/azure_tenant.py index 909acbe205..a15bbb68ff 100644 --- a/bbot/modules/azure_tenant.py +++ b/bbot/modules/azure_tenant.py @@ -1,4 +1,4 @@ -import re +import regex as re from contextlib import suppress from bbot.modules.base import BaseModule @@ -25,7 +25,7 @@ async def handle_event(self, event): tenant_id = None authorization_endpoint = openid_config.get("authorization_endpoint", "") - matches = self.helpers.regexes.uuid_regex.findall(authorization_endpoint) + matches = await self.helpers.re.findall(self.helpers.regexes.uuid_regex, authorization_endpoint) if matches: tenant_id = matches[0] @@ -86,7 +86,7 @@ async def query(self, domain): if status_code not in (200, 421): self.verbose(f'Error retrieving azure_tenant domains for "{domain}" (status code: {status_code})') return set(), dict() - found_domains = list(set(self.d_xml_regex.findall(r.text))) + found_domains = list(set(await self.helpers.re.findall(self.d_xml_regex, r.text))) domains = set() for d in found_domains: diff --git a/bbot/modules/dehashed.py b/bbot/modules/dehashed.py index c1a35c4195..caa5fb662d 100644 --- a/bbot/modules/dehashed.py +++ b/bbot/modules/dehashed.py @@ -33,7 +33,7 @@ async def handle_event(self, event): for entry in entries: # we have to clean up the email field because dehashed does a poor job of it email_str = entry.get("email", "").replace("\\", "") - found_emails = list(self.helpers.extract_emails(email_str)) + found_emails = list(await self.helpers.re.extract_emails(email_str)) if not found_emails: self.debug(f"Invalid email from dehashed.com: {email_str}") continue diff --git a/bbot/modules/emailformat.py b/bbot/modules/emailformat.py index 000c3d5cf3..31cff14689 100644 --- a/bbot/modules/emailformat.py +++ b/bbot/modules/emailformat.py @@ -17,6 +17,6 @@ async def handle_event(self, event): r = await self.request_with_fail_count(url) if not r: return - for email in self.helpers.extract_emails(r.text): + for email in await self.helpers.re.extract_emails(r.text): if email.endswith(query): await self.emit_event(email, "EMAIL_ADDRESS", source=event) diff --git a/bbot/modules/hunt.py b/bbot/modules/hunt.py index add45b665d..dd591a345b 100644 --- a/bbot/modules/hunt.py +++ b/bbot/modules/hunt.py @@ -1,7 +1,6 @@ # adapted from https://github.com/bugcrowd/HUNT from bbot.modules.base import BaseModule -from bbot.core.helpers.misc import extract_params_html hunt_param_dict = { "Command Injection": [ @@ -281,7 +280,7 @@ class hunt(BaseModule): async def handle_event(self, event): body = event.data.get("body", "") - for p in extract_params_html(body): + for p in await self.helpers.extract_params_html(body): for k in hunt_param_dict.keys(): if p.lower() in hunt_param_dict[k]: description = f"Found potential {k.upper()} parameter [{p}]" diff --git a/bbot/modules/internal/cloud.py b/bbot/modules/internal/cloud.py index 6bfceacffb..14aaba930a 100644 --- a/bbot/modules/internal/cloud.py +++ b/bbot/modules/internal/cloud.py @@ -43,7 +43,7 @@ async def handle_event(self, event, kwargs): for sig in sigs: matches = [] if event.type == "HTTP_RESPONSE": - matches = sig.findall(event.data.get("body", "")) + matches = await self.helpers.re.findall(sig, event.data.get("body", "")) elif event.type.startswith("DNS_NAME"): for host in hosts_to_check: match = sig.match(host) diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 199a72b5ef..6b819c0d20 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -30,7 +30,7 @@ async def _search(self, content, event, **kwargs): for name, regex in self.compiled_regexes.items(): # yield to event loop await self.excavate.helpers.sleep(0) - for result in await self.helpers.re_findall(regex, content): + for result in await self.helpers.re.findall(regex, content): yield result, name async def report(self, result, name, event): @@ -41,7 +41,7 @@ class CSPExtractor(BaseExtractor): regexes = {"CSP": r"(?i)(?m)Content-Security-Policy:.+$"} async def extract_domains(self, csp): - domains = await self.helpers.re_findall(dns_name_regex, csp) + domains = await self.helpers.re.findall(dns_name_regex, csp) unique_domains = set(domains) return unique_domains @@ -126,7 +126,7 @@ async def _search(self, content, event, **kwargs): for name, regex in self.compiled_regexes.items(): # yield to event loop await self.excavate.helpers.sleep(0) - for result in await self.helpers.re_findall(regex, content): + for result in await self.helpers.re.findall(regex, content): if name.startswith("full"): protocol, other = result result = f"{protocol}://{other}" @@ -387,7 +387,7 @@ async def handle_event(self, event): else: self.verbose(f"Exceeded max HTTP redirects ({self.max_redirects}): {location}") - body = self.helpers.recursive_decode(event.data.get("body", "")) + body = await self.helpers.re.recursive_decode(event.data.get("body", "")) await self.search( body, @@ -405,7 +405,7 @@ async def handle_event(self, event): consider_spider_danger=True, ) - headers = self.helpers.recursive_decode(event.data.get("raw_header", "")) + headers = await self.helpers.re.recursive_decode(event.data.get("raw_header", "")) await self.search( headers, [self.hostname, self.url, self.email, self.error_extractor, self.jwt, self.serialization, self.csp], diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index 540b840418..ffacb8c644 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -1,7 +1,7 @@ -import re import json import random import subprocess +import regex as re from bbot.modules.templates.subdomain_enum import subdomain_enum diff --git a/bbot/modules/oauth.py b/bbot/modules/oauth.py index 7e376000e6..fd6188acdd 100644 --- a/bbot/modules/oauth.py +++ b/bbot/modules/oauth.py @@ -119,7 +119,7 @@ async def getoidc(self, url): return url, token_endpoint, results if json and isinstance(json, dict): token_endpoint = json.get("token_endpoint", "") - for found in self.helpers.search_dict_values(json, *self.regexes): + for found in await self.helpers.re.search_dict_values(json, *self.regexes): results.add(found) results -= {token_endpoint} return url, token_endpoint, results diff --git a/bbot/modules/paramminer_headers.py b/bbot/modules/paramminer_headers.py index 7d2174bbee..35b5c0df11 100644 --- a/bbot/modules/paramminer_headers.py +++ b/bbot/modules/paramminer_headers.py @@ -1,6 +1,6 @@ from bbot.errors import HttpCompareError from bbot.modules.base import BaseModule -from bbot.core.helpers.misc import extract_params_json, extract_params_xml, extract_params_html +from bbot.core.helpers.misc import extract_params_json, extract_params_xml class paramminer_headers(BaseModule): @@ -158,7 +158,7 @@ async def handle_event(self, event): wl = set(self.wl) if self.config.get("http_extract"): - extracted_words = self.load_extracted_words(event.data.get("body"), event.data.get("content_type")) + extracted_words = await self.load_extracted_words(event.data.get("body"), event.data.get("content_type")) if extracted_words: self.debug(f"Extracted {str(len(extracted_words))} words from {url}") self.extracted_words_master.update(extracted_words - wl) @@ -195,7 +195,7 @@ def gen_count_args(self, url): yield header_count, (url,), {"headers": fake_headers} header_count -= 5 - def load_extracted_words(self, body, content_type): + async def load_extracted_words(self, body, content_type): if not body: return None if content_type and "json" in content_type.lower(): @@ -203,7 +203,7 @@ def load_extracted_words(self, body, content_type): elif content_type and "xml" in content_type.lower(): return extract_params_xml(body) else: - return set(extract_params_html(body)) + return set(await self.helpers.extract_params_html(body)) async def binary_search(self, compare_helper, url, group, reasons=None, reflection=False): if reasons is None: diff --git a/bbot/modules/pgp.py b/bbot/modules/pgp.py index 2c378f5853..78becbf0e8 100644 --- a/bbot/modules/pgp.py +++ b/bbot/modules/pgp.py @@ -28,7 +28,7 @@ async def query(self, query): url = url.replace("", self.helpers.quote(query)) response = await self.helpers.request(url) if response is not None: - for email in self.helpers.extract_emails(response.text): + for email in await self.helpers.re.extract_emails(response.text): email = email.lower() if email.endswith(query): results.add(email) diff --git a/bbot/modules/report/asn.py b/bbot/modules/report/asn.py index f906c785e6..982c765843 100644 --- a/bbot/modules/report/asn.py +++ b/bbot/modules/report/asn.py @@ -149,7 +149,7 @@ async def get_asn_metadata_ripe(self, asn_number): for item in record: key = item.get("key", "") value = item.get("value", "") - for email in self.helpers.extract_emails(value): + for email in await self.helpers.re.extract_emails(value): emails.add(email.lower()) if not key: continue diff --git a/bbot/modules/sitedossier.py b/bbot/modules/sitedossier.py index 86872c0523..e6571ea85e 100644 --- a/bbot/modules/sitedossier.py +++ b/bbot/modules/sitedossier.py @@ -36,12 +36,11 @@ async def query(self, query, parse_fn=None, request_fn=None): if response.status_code == 302: self.verbose("Hit rate limit captcha") break - for regex in self.scan.dns_regexes: - for match in regex.finditer(response.text): - hostname = match.group().lower() - if hostname and hostname not in results: - results.add(hostname) - yield hostname + for match in await self.helpers.re.finditer_multi(self.scan.dns_regexes, response.text): + hostname = match.group().lower() + if hostname and hostname not in results: + results.add(hostname) + yield hostname if ' Date: Fri, 19 Apr 2024 09:30:01 -0400 Subject: [PATCH 48/63] steady work on regexes --- bbot/core/helpers/regex.py | 7 +++++++ bbot/test/test_step_1/test_helpers.py | 18 +++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/bbot/core/helpers/regex.py b/bbot/core/helpers/regex.py index 99116a5c84..f85fb72a50 100644 --- a/bbot/core/helpers/regex.py +++ b/bbot/core/helpers/regex.py @@ -7,6 +7,10 @@ class RegexHelper: Class for misc CPU-intensive regex operations Offloads regex processing to other CPU cores via GIL release + thread pool + + For quick, one-off regexes, you don't need to use this helper. + Only use this helper if you're searching large bodies of text + or if your regex is CPU-intensive """ def __init__(self, parent_helper): @@ -35,6 +39,9 @@ async def finditer(self, compiled_regex, *args, **kwargs): return await self.parent_helper.run_in_executor(self._finditer, compiled_regex, *args, **kwargs) async def finditer_multi(self, compiled_regexes, *args, **kwargs): + """ + Same as finditer() but with multiple regexes + """ for r in compiled_regexes: self.ensure_compiled_regex(r) return await self.parent_helper.run_in_executor(self._finditer_multi, compiled_regexes, *args, **kwargs) diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index 5d1350a0bf..0ce3e0c761 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -360,25 +360,29 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_httpserver): assert helpers.smart_encode_punycode("ドメイン.テスト:80") == "xn--eckwd4c7c.xn--zckzah:80" assert helpers.smart_decode_punycode("xn--eckwd4c7c.xn--zckzah:80") == "ドメイン.テスト:80" - assert await helpers.recursive_decode("Hello%20world%21") == "Hello world!" - assert await helpers.recursive_decode("Hello%20%5Cu041f%5Cu0440%5Cu0438%5Cu0432%5Cu0435%5Cu0442") == "Hello Привет" + assert await helpers.re.recursive_decode("Hello%20world%21") == "Hello world!" assert ( - await helpers.recursive_decode("%5Cu0020%5Cu041f%5Cu0440%5Cu0438%5Cu0432%5Cu0435%5Cu0442%5Cu0021") + await helpers.re.recursive_decode("Hello%20%5Cu041f%5Cu0440%5Cu0438%5Cu0432%5Cu0435%5Cu0442") == "Hello Привет" + ) + assert ( + await helpers.re.recursive_decode("%5Cu0020%5Cu041f%5Cu0440%5Cu0438%5Cu0432%5Cu0435%5Cu0442%5Cu0021") == " Привет!" ) - assert await helpers.recursive_decode("Hello%2520world%2521") == "Hello world!" + assert await helpers.re.recursive_decode("Hello%2520world%2521") == "Hello world!" assert ( - await helpers.recursive_decode("Hello%255Cu0020%255Cu041f%255Cu0440%255Cu0438%255Cu0432%255Cu0435%255Cu0442") + await helpers.re.recursive_decode( + "Hello%255Cu0020%255Cu041f%255Cu0440%255Cu0438%255Cu0432%255Cu0435%255Cu0442" + ) == "Hello Привет" ) assert ( - await helpers.recursive_decode( + await helpers.re.recursive_decode( "%255Cu0020%255Cu041f%255Cu0440%255Cu0438%255Cu0432%255Cu0435%255Cu0442%255Cu0021" ) == " Привет!" ) assert ( - await helpers.recursive_decode(r"Hello\\nWorld\\\tGreetings\\\\nMore\nText") + await helpers.re.recursive_decode(r"Hello\\nWorld\\\tGreetings\\\\nMore\nText") == "Hello\nWorld\tGreetings\nMore\nText" ) From 7899408a2a83e264b51dc1ec1c780b6b163f5274 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 19 Apr 2024 11:29:44 -0400 Subject: [PATCH 49/63] don't start engine until necessary --- bbot/core/engine.py | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/bbot/core/engine.py b/bbot/core/engine.py index 06f965c456..c72eecbb32 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -8,7 +8,7 @@ import traceback import zmq.asyncio from pathlib import Path -from contextlib import contextmanager +from contextlib import asynccontextmanager, suppress from bbot.core import CORE from bbot.core.helpers.misc import rand_string @@ -31,12 +31,12 @@ def __init__(self, **kwargs): self.socket_address = f"zmq_{rand_string(8)}.sock" self.socket_path = Path(tempfile.gettempdir()) / self.socket_address self.server_kwargs = kwargs.pop("server_kwargs", {}) - self.server_process = self.start_server() + self._server_process = None self.context = zmq.asyncio.Context() atexit.register(self.cleanup) async def run_and_return(self, command, **kwargs): - with self.new_socket() as socket: + async with self.new_socket() as socket: message = self.make_message(command, args=kwargs) await socket.send(message) binary = await socket.recv() @@ -50,7 +50,7 @@ async def run_and_return(self, command, **kwargs): async def run_and_yield(self, command, **kwargs): message = self.make_message(command, args=kwargs) - with self.new_socket() as socket: + async with self.new_socket() as socket: await socket.send(message) while 1: binary = await socket.recv() @@ -86,7 +86,7 @@ def make_message(self, command, args): def available_commands(self): return [s for s in self.CMDS if isinstance(s, str)] - def start_server(self, **server_kwargs): + def start_server(self): process = CORE.create_process( target=self.server_process, args=( @@ -100,17 +100,30 @@ def start_server(self, **server_kwargs): @staticmethod def server_process(server_class, socket_path, **kwargs): - engine_server = server_class(socket_path, **kwargs) - asyncio.run(engine_server.worker()) - - @contextmanager - def new_socket(self): + try: + engine_server = server_class(socket_path, **kwargs) + asyncio.run(engine_server.worker()) + except (asyncio.CancelledError, KeyboardInterrupt): + pass + except Exception: + import traceback + + log = logging.getLogger("bbot.core.engine.server") + log.critical(f"Unhandled error in {server_class.__name__} server process: {traceback.format_exc()}") + + @asynccontextmanager + async def new_socket(self): + if self._server_process is None: + self._server_process = self.start_server() + while not self.socket_path.exists(): + await asyncio.sleep(0.1) socket = self.context.socket(zmq.DEALER) socket.connect(f"ipc://{self.socket_path}") try: yield socket finally: - socket.close() + with suppress(Exception): + socket.close() def cleanup(self): # delete socket file on exit @@ -158,7 +171,6 @@ async def worker(self): try: while 1: client_id, binary = await self.socket.recv_multipart() - # self.log.debug(f"{self.name} got binary: {binary}") message = pickle.loads(binary) self.log.debug(f"{self.name} got message: {message}") @@ -189,4 +201,5 @@ async def worker(self): self.log.error(f"Error in EngineServer worker: {e}") self.log.trace(traceback.format_exc()) finally: - self.socket.close() + with suppress(Exception): + self.socket.close() From 0f6240c3e2e2df625cc64621c986b48d9a1f29e0 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 19 Apr 2024 13:47:07 -0400 Subject: [PATCH 50/63] update cloudcheck --- bbot/modules/internal/cloud.py | 3 ++- bbot/test/test_step_2/module_tests/test_module_cloud.py | 2 ++ poetry.lock | 9 +++++---- pyproject.toml | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/bbot/modules/internal/cloud.py b/bbot/modules/internal/cloud.py index 14aaba930a..e6bab4baa5 100644 --- a/bbot/modules/internal/cloud.py +++ b/bbot/modules/internal/cloud.py @@ -9,8 +9,9 @@ class cloud(InterceptModule): async def setup(self): self.dummy_modules = {} - for provider_name in self.helpers.cloud.providers: + for provider_name, provider in self.helpers.cloud.providers.items(): self.dummy_modules[provider_name] = self.scan._make_dummy_module(f"cloud_{provider_name}", _type="scan") + return True async def filter_event(self, event): diff --git a/bbot/test/test_step_2/module_tests/test_module_cloud.py b/bbot/test/test_step_2/module_tests/test_module_cloud.py index 1d4e59283a..1ee8df5e7c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_cloud.py +++ b/bbot/test/test_step_2/module_tests/test_module_cloud.py @@ -70,6 +70,8 @@ async def setup_after_prep(self, module_test): assert "cloud-storage-bucket" in google_event3.tags def check(self, module_test, events): + for e in events: + self.log.debug(e) assert 2 == len([e for e in events if e.type == "STORAGE_BUCKET"]) assert 1 == len( [ diff --git a/poetry.lock b/poetry.lock index ce4a80b8fc..05386f0a43 100644 --- a/poetry.lock +++ b/poetry.lock @@ -388,18 +388,19 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] name = "cloudcheck" -version = "3.1.0.318" +version = "4.0.0.345" description = "Check whether an IP address belongs to a cloud provider" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "cloudcheck-3.1.0.318-py3-none-any.whl", hash = "sha256:471dba97531e1f60aadab8daa6cb1d63727f67c16fd7b4758db46c9af2f362f1"}, - {file = "cloudcheck-3.1.0.318.tar.gz", hash = "sha256:ba7fcc026817aa05f74c7789d2ac306469f3143f91b3ea9f95c57c70a7b0b787"}, + {file = "cloudcheck-4.0.0.345-py3-none-any.whl", hash = "sha256:82a1cecaa0ec35a50d6c1e4884a9535eb4c1c788b845b0c4a91b44935f4dc765"}, + {file = "cloudcheck-4.0.0.345.tar.gz", hash = "sha256:787953a305c0be6e6eb4ceb9990dccb633f9e1429d5ebfda7acf7dca35b3caeb"}, ] [package.dependencies] httpx = ">=0.26,<0.28" pydantic = ">=2.4.2,<3.0.0" +regex = ">=2024.4.16,<2025.0.0" [[package]] name = "colorama" @@ -2624,4 +2625,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "6e455b3aa900eff3b432ee7f9f92ca1a13193eccbd7c75bee99cb4d12891dfdd" +content-hash = "15633b02fcedb3d044f4e40a45ce1e9dd7209608a0389175a4523e3810a8504b" diff --git a/pyproject.toml b/pyproject.toml index 90bf19d47b..0cc6eed310 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,6 @@ lxml = ">=4.9.2,<6.0.0" dnspython = "^2.4.2" pydantic = "^2.4.2" httpx = "^0.26.0" -cloudcheck = ">=2.1.0.181,<4.0.0.0" tldextract = "^5.1.1" cachetools = "^5.3.2" socksio = "^1.0.0" @@ -51,6 +50,7 @@ jinja2 = "^3.1.3" pyzmq = "^25.1.2" regex = "^2024.4.16" unidecode = "^1.3.8" +cloudcheck = "^4.0.0.345" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From a6a7ade89b4ead0f38c3af6a10fd360ed981120c Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 19 Apr 2024 15:15:09 -0400 Subject: [PATCH 51/63] fix tests --- bbot/modules/hunt.py | 2 +- bbot/modules/paramminer_headers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/modules/hunt.py b/bbot/modules/hunt.py index dd591a345b..0a759f2cf2 100644 --- a/bbot/modules/hunt.py +++ b/bbot/modules/hunt.py @@ -280,7 +280,7 @@ class hunt(BaseModule): async def handle_event(self, event): body = event.data.get("body", "") - for p in await self.helpers.extract_params_html(body): + for p in await self.helpers.re.extract_params_html(body): for k in hunt_param_dict.keys(): if p.lower() in hunt_param_dict[k]: description = f"Found potential {k.upper()} parameter [{p}]" diff --git a/bbot/modules/paramminer_headers.py b/bbot/modules/paramminer_headers.py index 35b5c0df11..561a05fe27 100644 --- a/bbot/modules/paramminer_headers.py +++ b/bbot/modules/paramminer_headers.py @@ -203,7 +203,7 @@ async def load_extracted_words(self, body, content_type): elif content_type and "xml" in content_type.lower(): return extract_params_xml(body) else: - return set(await self.helpers.extract_params_html(body)) + return set(await self.helpers.re.extract_params_html(body)) async def binary_search(self, compare_helper, url, group, reasons=None, reflection=False): if reasons is None: From eaf2cdf6f206521470682e0fc6c8087b067c5c72 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:23:18 -0400 Subject: [PATCH 52/63] implement radixtarget --- bbot/core/event/base.py | 6 ++- bbot/core/helpers/dns/dns.py | 20 ++++---- bbot/core/helpers/misc.py | 71 ++------------------------- bbot/scanner/target.py | 61 ++++++++++++----------- bbot/test/test_step_1/test_helpers.py | 10 ---- poetry.lock | 13 ++++- pyproject.toml | 1 + 7 files changed, 65 insertions(+), 117 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index c036b96185..8c69d829d0 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -8,6 +8,7 @@ from datetime import datetime from contextlib import suppress from urllib.parse import urljoin +from radixtarget import RadixTarget from pydantic import BaseModel, field_validator from .helpers import * @@ -15,7 +16,6 @@ from bbot.core.helpers import ( extract_words, get_file_extension, - host_in_host, is_domain, is_subdomain, is_ip, @@ -580,7 +580,9 @@ def __contains__(self, other): if self.host == other.host: return True # hostnames and IPs - return host_in_host(other.host, self.host) + radixtarget = RadixTarget() + radixtarget.insert(self.host) + return bool(radixtarget.search(other.host)) return False def json(self, mode="json", siem_friendly=False): diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 9764687bf1..2d78d2c196 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -2,9 +2,10 @@ import logging import dns.exception import dns.asyncresolver +from radixtarget import RadixTarget from bbot.core.engine import EngineClient -from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name, host_in_host +from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name from .engine import DNSEngine @@ -63,10 +64,9 @@ def __init__(self, parent_helper): # wildcard handling self.wildcard_disable = self.config.get("dns_wildcard_disable", False) - self.wildcard_ignore = self.config.get("dns_wildcard_ignore", None) - if not self.wildcard_ignore: - self.wildcard_ignore = [] - self.wildcard_ignore = tuple([str(d).strip().lower() for d in self.wildcard_ignore]) + self.wildcard_ignore = RadixTarget() + for d in self.config.get("dns_wildcard_ignore", []): + self.wildcard_ignore.insert(d) # copy the system's current resolvers to a text file for tool use self.system_resolvers = dns.resolver.Resolver().nameservers @@ -150,10 +150,12 @@ def _wildcard_prevalidation(self, host): return False # skip check if the query's parent domain is excluded in the config - for d in self.wildcard_ignore: - if host_in_host(host, d): - log.debug(f"Skipping wildcard detection on {host} because it is excluded in the config") - return False + wildcard_ignore = self.wildcard_ignore.search(host) + if wildcard_ignore: + log.debug( + f"Skipping wildcard detection on {host} because it or its parent domai ({wildcard_ignore}) is excluded in the config" + ) + return False return host diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index d6e3238d1b..a4378069d8 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -637,7 +637,7 @@ def is_ip_type(i): >>> is_ip_type("192.168.1.0/24") False """ - return isinstance(i, ipaddress._BaseV4) or isinstance(i, ipaddress._BaseV6) + return ipaddress._IPAddressBase in i.__class__.__mro__ def make_ip_type(s): @@ -663,78 +663,17 @@ def make_ip_type(s): >>> make_ip_type("evilcorp.com") 'evilcorp.com' """ + if not s: + raise ValueError(f'Invalid hostname: "{s}"') # IP address with suppress(Exception): - return ipaddress.ip_address(str(s).strip()) + return ipaddress.ip_address(s) # IP network with suppress(Exception): - return ipaddress.ip_network(str(s).strip(), strict=False) + return ipaddress.ip_network(s, strict=False) return s -def host_in_host(host1, host2): - """ - Checks if host1 is included within host2, either as a subdomain, IP, or IP network. - Used for scope calculations/decisions within BBOT. - - Args: - host1 (str or ipaddress.IPv4Address or ipaddress.IPv6Address or ipaddress.IPv4Network or ipaddress.IPv6Network): - The host to check for inclusion within host2. - host2 (str or ipaddress.IPv4Address or ipaddress.IPv6Address or ipaddress.IPv4Network or ipaddress.IPv6Network): - The host within which to check for the inclusion of host1. - - Returns: - bool: True if host1 is included in host2, otherwise False. - - Examples: - >>> host_in_host("www.evilcorp.com", "evilcorp.com") - True - >>> host_in_host("evilcorp.com", "www.evilcorp.com") - False - >>> host_in_host(ipaddress.IPv6Address('dead::beef'), ipaddress.IPv6Network('dead::/64')) - True - >>> host_in_host(ipaddress.IPv4Address('192.168.1.1'), ipaddress.IPv4Network('10.0.0.0/8')) - False - - Notes: - - If checking an IP address/network, you MUST FIRST convert your IP into an ipaddress object (e.g. via `make_ip_type()`) before passing it to this function. - """ - - """ - Is host1 included in host2? - "www.evilcorp.com" in "evilcorp.com"? --> True - "evilcorp.com" in "www.evilcorp.com"? --> False - IPv6Address('dead::beef') in IPv6Network('dead::/64')? --> True - IPv4Address('192.168.1.1') in IPv4Network('10.0.0.0/8')? --> False - - Very important! Used throughout BBOT for scope calculations/decisions. - - Works with hostnames, IPs, and IP networks. - """ - - if not host1 or not host2: - return False - - # check if hosts are IP types - host1_ip_type = is_ip_type(host1) - host2_ip_type = is_ip_type(host2) - # if both hosts are IP types - if host1_ip_type and host2_ip_type: - if not host1.version == host2.version: - return False - host1_net = ipaddress.ip_network(host1) - host2_net = ipaddress.ip_network(host2) - return host1_net.subnet_of(host2_net) - - # else hostnames - elif not (host1_ip_type or host2_ip_type): - host2_len = len(host2.split(".")) - host1_truncated = ".".join(host1.split(".")[-host2_len:]) - return host1_truncated == host2 - - return False - - def sha1(data): """ Computes the SHA-1 hash of the given data. diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index a0f8130c8e..1016fd3cf8 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -1,12 +1,13 @@ import re +import copy import logging import ipaddress from contextlib import suppress +from radixtarget import RadixTarget from bbot.errors import * from bbot.modules.base import BaseModule from bbot.core.event import make_event, is_event -from bbot.core.helpers.misc import ip_network_parents, is_ip_type, domain_parents log = logging.getLogger("bbot.core.target") @@ -19,7 +20,8 @@ class Target: strict_scope (bool): Flag indicating whether to consider child domains in-scope. If set to True, only the exact hosts specified and not their children are considered part of the target. - _events (dict): Dictionary mapping hosts to events related to the target. + _radix (RadixTree): Radix tree for quick IP/DNS lookups. + _events (set): Flat set of contained events. Examples: Basic usage @@ -85,8 +87,9 @@ def __init__(self, *targets, strict_scope=False): "ORG_STUB": re.compile(r"^ORG:(.*)", re.IGNORECASE), "ASN": re.compile(r"^ASN:(.*)", re.IGNORECASE), } + self._events = set() + self._radix = RadixTarget() - self._events = dict() if len(targets) > 0: log.verbose(f"Creating events from {len(targets):,} targets") for t in targets: @@ -142,17 +145,18 @@ def add_target(self, t, event_type=None): if not str(t).startswith("#"): raise ValidationError(f'Could not add target "{t}": {e}') - try: - self._events[event.host].add(event) - except KeyError: - self._events[event.host] = { - event, - } + radix_data = self._radix.search(event.host) + if radix_data is None: + radix_data = {event} + self._radix.insert(event.host, radix_data) + else: + radix_data.add(event) + self._events.add(event) @property def events(self): """ - A generator property that yields all events in the target. + Returns all events in the target. Yields: Event object: One of the Event objects stored in the `_events` dictionary. @@ -164,14 +168,12 @@ def events(self): Notes: - This property is read-only. - - Iterating over this property gives you one event at a time from the `_events` dictionary. """ - for _events in self._events.values(): - yield from _events + return self._events def copy(self): """ - Creates and returns a copy of the Target object, including a shallow copy of the `_events` attribute. + Creates and returns a copy of the Target object, including a shallow copy of the `_events` and `_radix` attributes. Returns: Target: A new Target object with the sameattributes as the original. @@ -193,12 +195,13 @@ def copy(self): - The `scan` object reference is kept intact in the copied Target object. """ self_copy = self.__class__() - self_copy._events = dict(self._events) + self_copy._events = set(self._events) + self_copy._radix = copy.copy(self._radix) return self_copy def get(self, host): """ - Gets the event associated with the specified host from the target's `_events` dictionary. + Gets the event associated with the specified host from the target's radix tree. Args: host (Event, Target, or str): The hostname, IP, URL, or event to look for. @@ -224,15 +227,15 @@ def get(self, host): return if other.host: with suppress(KeyError, StopIteration): - return next(iter(self._events[other.host])) - if is_ip_type(other.host): - for n in ip_network_parents(other.host, include_self=True): - with suppress(KeyError, StopIteration): - return next(iter(self._events[n])) - elif not self.strict_scope: - for h in domain_parents(other.host): - with suppress(KeyError, StopIteration): - return next(iter(self._events[h])) + result = self._radix.search(other.host) + if result is not None: + for event in result: + # if the result is a dns name and strict scope is enabled + if isinstance(result, str) and self.strict_scope: + # if the result doesn't exactly equal the host, abort + if event.host != other.host: + return + return event def _contains(self, other): if self.get(other) is not None: @@ -282,11 +285,11 @@ def __len__(self): - For other types of hosts, each unique event is counted as one. """ num_hosts = 0 - for host, _events in self._events.items(): - if type(host) in (ipaddress.IPv4Network, ipaddress.IPv6Network): - num_hosts += host.num_addresses + for event in self._events: + if isinstance(event.host, (ipaddress.IPv4Network, ipaddress.IPv6Network)): + num_hosts += event.host.num_addresses else: - num_hosts += len(_events) + num_hosts += 1 return num_hosts diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index 0ce3e0c761..4e3f3993eb 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -103,16 +103,6 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_httpserver): assert helpers.domain_stem("evilcorp.co.uk") == "evilcorp" assert helpers.domain_stem("www.evilcorp.co.uk") == "www.evilcorp" - assert helpers.host_in_host("www.evilcorp.com", "evilcorp.com") == True - assert helpers.host_in_host("asdf.www.evilcorp.com", "evilcorp.com") == True - assert helpers.host_in_host("evilcorp.com", "www.evilcorp.com") == False - assert helpers.host_in_host("evilcorp.com", "evilcorp.com") == True - assert helpers.host_in_host("evilcorp.com", "eevilcorp.com") == False - assert helpers.host_in_host("eevilcorp.com", "evilcorp.com") == False - assert helpers.host_in_host("evilcorp.com", "evilcorp") == False - assert helpers.host_in_host("evilcorp", "evilcorp.com") == False - assert helpers.host_in_host("evilcorp.com", "com") == True - assert tuple(await helpers.re.extract_emails("asdf@asdf.com\nT@t.Com&a=a@a.com__ b@b.com")) == ( "asdf@asdf.com", "t@t.com", diff --git a/poetry.lock b/poetry.lock index 05386f0a43..034b4fef64 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2084,6 +2084,17 @@ files = [ [package.dependencies] cffi = {version = "*", markers = "implementation_name == \"pypy\""} +[[package]] +name = "radixtarget" +version = "1.0.0.15" +description = "Check whether an IP address belongs to a cloud provider" +optional = false +python-versions = "<4.0,>=3.9" +files = [ + {file = "radixtarget-1.0.0.15-py3-none-any.whl", hash = "sha256:4e3f0620bfbc0ef2ff3d71270dd281c0e8428906d260f737f82b573a7b636dd8"}, + {file = "radixtarget-1.0.0.15.tar.gz", hash = "sha256:c8294ebbb76e6d2826deaa8fe18d568308eddfd25f20644e166c492d2626a70c"}, +] + [[package]] name = "regex" version = "2024.4.16" @@ -2625,4 +2636,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "15633b02fcedb3d044f4e40a45ce1e9dd7209608a0389175a4523e3810a8504b" +content-hash = "100618fdac0971d8b3662f2bfe72a8fae4f221ca78dfc6a0edf605859ab64f3f" diff --git a/pyproject.toml b/pyproject.toml index 0cc6eed310..1c0c15a9c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ pyzmq = "^25.1.2" regex = "^2024.4.16" unidecode = "^1.3.8" cloudcheck = "^4.0.0.345" +radixtarget = "^1.0.0.15" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From af110c9a6bf4fec254471e63674d11beee1a30c9 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:35:32 -0400 Subject: [PATCH 53/63] better scope tests --- bbot/scanner/target.py | 2 +- bbot/test/test_step_1/test_target.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index 1016fd3cf8..7059bda70e 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -231,7 +231,7 @@ def get(self, host): if result is not None: for event in result: # if the result is a dns name and strict scope is enabled - if isinstance(result, str) and self.strict_scope: + if isinstance(event.host, str) and self.strict_scope: # if the result doesn't exactly equal the host, abort if event.host != other.host: return diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index dced8af02c..5215931914 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -38,3 +38,13 @@ def test_target(bbot_scanner): assert scan1.target.get("2001:4860:4860::888c") is None assert str(scan1.target.get("www.api.publicapis.org").host) == "api.publicapis.org" assert scan1.target.get("publicapis.org") is None + + from bbot.scanner.target import Target + target = Target("evilcorp.com") + assert not "com" in target + assert "evilcorp.com" in target + assert "www.evilcorp.com" in target + strict_target = Target("evilcorp.com", strict_scope=True) + assert not "com" in strict_target + assert "evilcorp.com" in strict_target + assert not "www.evilcorp.com" in strict_target From 8f72db74e982778b491d5a4bb2fef94fe7779a80 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:35:46 -0400 Subject: [PATCH 54/63] blacked --- bbot/test/test_step_1/test_target.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index 5215931914..cf210c0f69 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -40,6 +40,7 @@ def test_target(bbot_scanner): assert scan1.target.get("publicapis.org") is None from bbot.scanner.target import Target + target = Target("evilcorp.com") assert not "com" in target assert "evilcorp.com" in target From 4f073125ca89d1f523667f15b127d008148a5940 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:42:23 -0400 Subject: [PATCH 55/63] update cloudcheck --- poetry.lock | 9 +++++---- pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 034b4fef64..be6fea4105 100644 --- a/poetry.lock +++ b/poetry.lock @@ -388,18 +388,19 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] name = "cloudcheck" -version = "4.0.0.345" +version = "5.0.0.350" description = "Check whether an IP address belongs to a cloud provider" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "cloudcheck-4.0.0.345-py3-none-any.whl", hash = "sha256:82a1cecaa0ec35a50d6c1e4884a9535eb4c1c788b845b0c4a91b44935f4dc765"}, - {file = "cloudcheck-4.0.0.345.tar.gz", hash = "sha256:787953a305c0be6e6eb4ceb9990dccb633f9e1429d5ebfda7acf7dca35b3caeb"}, + {file = "cloudcheck-5.0.0.350-py3-none-any.whl", hash = "sha256:6f2ed981818bde6d8b6c5a6413a843e11d0aa1a4bf8b36452dcae1030a537dd6"}, + {file = "cloudcheck-5.0.0.350.tar.gz", hash = "sha256:cb59dfef966268ebc176e242634b84a3423a84ffaf4fac40566f37edfaddc106"}, ] [package.dependencies] httpx = ">=0.26,<0.28" pydantic = ">=2.4.2,<3.0.0" +radixtarget = ">=1.0.0.14,<2.0.0.0" regex = ">=2024.4.16,<2025.0.0" [[package]] @@ -2636,4 +2637,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "100618fdac0971d8b3662f2bfe72a8fae4f221ca78dfc6a0edf605859ab64f3f" +content-hash = "ed8bb07e4ff5a5f665402db33f9016409547bef1ccb6a8c2c626c44fde075abb" diff --git a/pyproject.toml b/pyproject.toml index 1c0c15a9c4..7ba00c488c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,8 +50,8 @@ jinja2 = "^3.1.3" pyzmq = "^25.1.2" regex = "^2024.4.16" unidecode = "^1.3.8" -cloudcheck = "^4.0.0.345" radixtarget = "^1.0.0.15" +cloudcheck = "^5.0.0.350" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From 53f71e9af883396da0cb22712f306c5b3129f12c Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:43:09 -0400 Subject: [PATCH 56/63] fix cloudcheck --- bbot/modules/internal/cloud.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bbot/modules/internal/cloud.py b/bbot/modules/internal/cloud.py index e6bab4baa5..29abef4d2e 100644 --- a/bbot/modules/internal/cloud.py +++ b/bbot/modules/internal/cloud.py @@ -24,9 +24,9 @@ async def handle_event(self, event, kwargs): hosts_to_check = set(str(s) for s in event.resolved_hosts) hosts_to_check.add(str(event.host_original)) for host in hosts_to_check: - provider, provider_type, subnet = self.helpers.cloudcheck(host) - if provider: - event.add_tag(f"{provider_type}-{provider}") + for provider, provider_type, subnet in self.helpers.cloudcheck(host) + if provider: + event.add_tag(f"{provider_type}-{provider}") found = set() # look for cloud assets in hosts, http responses From a3c8e61da81c42decfc6de3659928d0cc6ddba9f Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:48:53 -0400 Subject: [PATCH 57/63] better target tests --- bbot/modules/internal/cloud.py | 2 +- bbot/scanner/target.py | 25 ++++++++++++------------- bbot/test/test_step_1/test_target.py | 11 +++++++++++ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/bbot/modules/internal/cloud.py b/bbot/modules/internal/cloud.py index 29abef4d2e..7939487fd4 100644 --- a/bbot/modules/internal/cloud.py +++ b/bbot/modules/internal/cloud.py @@ -24,7 +24,7 @@ async def handle_event(self, event, kwargs): hosts_to_check = set(str(s) for s in event.resolved_hosts) hosts_to_check.add(str(event.host_original)) for host in hosts_to_check: - for provider, provider_type, subnet in self.helpers.cloudcheck(host) + for provider, provider_type, subnet in self.helpers.cloudcheck(host): if provider: event.add_tag(f"{provider_type}-{provider}") diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index 7059bda70e..b19d1b6a61 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -118,11 +118,8 @@ def add_target(self, t, event_type=None): t = [t] for single_target in t: if type(single_target) == self.__class__: - for k, v in single_target._events.items(): - try: - self._events[k].update(v) - except KeyError: - self._events[k] = set(single_target._events[k]) + for event in single_target.events: + self._add_event(event) else: if is_event(single_target): event = single_target @@ -144,14 +141,7 @@ def add_target(self, t, event_type=None): # allow commented lines if not str(t).startswith("#"): raise ValidationError(f'Could not add target "{t}": {e}') - - radix_data = self._radix.search(event.host) - if radix_data is None: - radix_data = {event} - self._radix.insert(event.host, radix_data) - else: - radix_data.add(event) - self._events.add(event) + self._add_event(event) @property def events(self): @@ -237,6 +227,15 @@ def get(self, host): return return event + def _add_event(self, event): + radix_data = self._radix.search(event.host) + if radix_data is None: + radix_data = {event} + self._radix.insert(event.host, radix_data) + else: + radix_data.add(event) + self._events.add(event) + def _contains(self, other): if self.get(other) is not None: return True diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index cf210c0f69..ed5c1b7efb 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -49,3 +49,14 @@ def test_target(bbot_scanner): assert not "com" in strict_target assert "evilcorp.com" in strict_target assert not "www.evilcorp.com" in strict_target + + target = Target() + target.add_target("evilcorp.com") + assert not "com" in target + assert "evilcorp.com" in target + assert "www.evilcorp.com" in target + strict_target = Target(strict_scope=True) + strict_target.add_target("evilcorp.com") + assert not "com" in strict_target + assert "evilcorp.com" in strict_target + assert not "www.evilcorp.com" in strict_target From d42c189bf4eef282cd6bcb3d7c8b41143455bdab Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 18:00:46 -0400 Subject: [PATCH 58/63] fix typo --- bbot/core/helpers/dns/dns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 2d78d2c196..5b5365f282 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -153,7 +153,7 @@ def _wildcard_prevalidation(self, host): wildcard_ignore = self.wildcard_ignore.search(host) if wildcard_ignore: log.debug( - f"Skipping wildcard detection on {host} because it or its parent domai ({wildcard_ignore}) is excluded in the config" + f"Skipping wildcard detection on {host} because {wildcard_ignore} is excluded in the config" ) return False From 8c07684ca7364966cf5b688e374ce6e2e4134415 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 18:18:28 -0400 Subject: [PATCH 59/63] better dns name sanitization --- bbot/core/helpers/dns/dns.py | 4 +--- bbot/core/helpers/dns/engine.py | 21 ++++++++++++++------- bbot/test/test_step_1/test_dns.py | 11 +++++++++++ 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 5b5365f282..7f775483ca 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -152,9 +152,7 @@ def _wildcard_prevalidation(self, host): # skip check if the query's parent domain is excluded in the config wildcard_ignore = self.wildcard_ignore.search(host) if wildcard_ignore: - log.debug( - f"Skipping wildcard detection on {host} because {wildcard_ignore} is excluded in the config" - ) + log.debug(f"Skipping wildcard detection on {host} because {wildcard_ignore} is excluded in the config") return False return host diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index b8e184264b..6018e0e3f8 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -403,7 +403,8 @@ def new_task(query, rdtype): if queries: # Start a new task for each one completed, if URLs remain new_task(*queries.pop(0)) - def extract_targets(self, record): + @staticmethod + def extract_targets(record): """ Extracts hostnames or IP addresses from a given DNS record. @@ -429,24 +430,30 @@ def extract_targets(self, record): """ results = set() + + def add_result(rdtype, _record): + cleaned = clean_dns_record(_record) + if cleaned: + results.add((rdtype, cleaned)) + rdtype = str(record.rdtype.name).upper() if rdtype in ("A", "AAAA", "NS", "CNAME", "PTR"): - results.add((rdtype, clean_dns_record(record))) + add_result(rdtype, record) elif rdtype == "SOA": - results.add((rdtype, clean_dns_record(record.mname))) + add_result(rdtype, record.mname) elif rdtype == "MX": - results.add((rdtype, clean_dns_record(record.exchange))) + add_result(rdtype, record.exchange) elif rdtype == "SRV": - results.add((rdtype, clean_dns_record(record.target))) + add_result(rdtype, record.target) elif rdtype == "TXT": for s in record.strings: s = smart_decode(s) for match in dns_name_regex.finditer(s): start, end = match.span() host = s[start:end] - results.add((rdtype, host)) + add_result(rdtype, host) elif rdtype == "NSEC": - results.add((rdtype, clean_dns_record(record.next))) + add_result(rdtype, record.next) else: log.warning(f'Unknown DNS record type "{rdtype}"') return results diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index afc5c1967f..05796e4645 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -29,6 +29,17 @@ async def test_dns_engine(bbot_scanner): pass_2 = True assert pass_1 and pass_2 + from bbot.core.helpers.dns.engine import DNSEngine + from bbot.core.helpers.dns.mock import MockResolver + + # ensure dns records are being properly cleaned + mockresolver = MockResolver({"evilcorp.com": {"MX": ["0 ."]}}) + mx_records = await mockresolver.resolve("evilcorp.com", rdtype="MX") + results = set() + for r in mx_records: + results.update(DNSEngine.extract_targets(r)) + assert not results + @pytest.mark.asyncio async def test_dns_resolution(bbot_scanner): From e4fd60af06d6e9570105f7fdf49bd6c4d7d46661 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 19:06:51 -0400 Subject: [PATCH 60/63] fix ffuf tests --- .../test_step_2/module_tests/test_module_ffuf_shortnames.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py b/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py index cbbec11ea6..1f624a4104 100644 --- a/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py +++ b/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py @@ -143,7 +143,7 @@ async def setup_after_prep(self, module_test): tags=["shortname-file"], ) ) - module_test.scan.target._events["http://127.0.0.1:8888"] = seed_events + module_test.scan.target._events = set(seed_events) expect_args = {"method": "GET", "uri": "/administrator.aspx"} respond_args = {"response_data": "alive"} From faf61eecd47c6ef88aa9b317ac80a8700b064439 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 23 Apr 2024 11:25:37 -0400 Subject: [PATCH 61/63] small scope tweak --- bbot/core/event/base.py | 2 +- bbot/scanner/manager.py | 1 - .../test_manager_scope_accuracy.py | 28 +++---- bbot/test/test_step_1/test_scope.py | 75 ++++++++++++++++++- 4 files changed, 88 insertions(+), 18 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 8c69d829d0..d7eabd6db5 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -93,7 +93,7 @@ class BaseEvent: # Always emit this event type even if it's not in scope _always_emit = False # Always emit events with these tags even if they're not in scope - _always_emit_tags = ["affiliate"] + _always_emit_tags = ["affiliate", "target"] # Bypass scope checking and dns resolution, distribute immediately to modules # This is useful for "end-of-line" events like FINDING and VULNERABILITY _quick_emit = False diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index 76d7b6028b..6fa59cf3e2 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -43,7 +43,6 @@ async def init_events(self, events): sorted_events = sorted(events, key=lambda e: len(e.data)) for event in [self.scan.root_event] + sorted_events: event._dummy = False - event.scope_distance = 0 event.web_spider_distance = 0 event.scan = self.scan if event.source is None: diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index bc79a0029e..dbca452769 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -750,12 +750,12 @@ def custom_setup(scan): "127.0.0.0/31", modules=["sslcert"], whitelist=["127.0.1.0"], - _config={"dns_resolution": False, "scope_report_distance": 0, "speculate": True, "modules": {"speculate": {"ports": "9999"}}}, + _config={"dns_resolution": False, "scope_report_distance": 0, "scope_search_distance": 1, "speculate": True, "modules": {"speculate": {"ports": "9999"}}}, _dns_mock={"www.bbottest.notreal": {"A": ["127.0.0.1"]}, "test.notreal": {"A": ["127.0.1.0"]}}, ) assert len(events) == 3 - assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 1]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999"]) @@ -765,30 +765,30 @@ def custom_setup(scan): assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) assert len(all_events) == 11 - assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 1]) - assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal == True and e.scope_distance == 1]) + assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 2]) + assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 2]) + assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal == True and e.scope_distance == 2]) assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "sslcert"]) + assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 3 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) assert len(all_events_nodups) == 9 - assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal == True and e.scope_distance == 1]) + assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 2]) + assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 2]) + assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "sslcert"]) + assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 3 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 5 - assert 1 == len([e for e in graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 1]) assert 0 == len([e for e in graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) - assert 1 == len([e for e in graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 1]) + assert 1 == len([e for e in graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 2]) assert 0 == len([e for e in graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999"]) assert 1 == len([e for e in graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) diff --git a/bbot/test/test_step_1/test_scope.py b/bbot/test/test_step_1/test_scope.py index e51fec9735..7435b82af7 100644 --- a/bbot/test/test_step_1/test_scope.py +++ b/bbot/test/test_step_1/test_scope.py @@ -2,10 +2,58 @@ from ..test_step_2.module_tests.base import ModuleTestBase -class Scope_test_blacklist(ModuleTestBase): +class TestScopeBaseline(ModuleTestBase): targets = ["http://127.0.0.1:8888"] modules_overrides = ["httpx"] + async def setup_after_prep(self, module_test): + expect_args = {"method": "GET", "uri": "/"} + respond_args = {"response_data": "alive"} + module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) + + def check(self, module_test, events): + assert len(events) == 6 + assert 1 == len( + [ + e + for e in events + if e.type == "URL_UNVERIFIED" + and str(e.host) == "127.0.0.1" + and e.scope_distance == 0 + and "target" in e.tags + ] + ) + # we have two of these because the host module considers "always_emit" in its outgoing deduplication + assert 2 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" + and e.data == "127.0.0.1" + and e.scope_distance == 0 + and str(e.module) == "host" + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "HTTP_RESPONSE" + and str(e.host) == "127.0.0.1" + and e.port == 8888 + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "URL" and str(e.host) == "127.0.0.1" and e.port == 8888 and e.scope_distance == 0 + ] + ) + + +class TestScopeBlacklist(TestScopeBaseline): blacklist = ["127.0.0.1"] async def setup_after_prep(self, module_test): @@ -14,9 +62,32 @@ async def setup_after_prep(self, module_test): module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) def check(self, module_test, events): + assert len(events) == 1 assert not any(e.type == "URL" for e in events) + assert not any(str(e.host) == "127.0.0.1" for e in events) -class Scope_test_whitelist(Scope_test_blacklist): +class TestScopeWhitelist(TestScopeBlacklist): blacklist = [] whitelist = ["255.255.255.255"] + + def check(self, module_test, events): + assert len(events) == 3 + assert not any(e.type == "URL" for e in events) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.scope_distance == 1 and "target" in e.tags + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "URL_UNVERIFIED" + and str(e.host) == "127.0.0.1" + and e.scope_distance == 1 + and "target" in e.tags + ] + ) From 630c87e5bf686034239e3ecdc1293e37acd5a0a8 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 23 Apr 2024 12:24:56 -0400 Subject: [PATCH 62/63] remove resolved/unresolved tags as they are redundant --- bbot/modules/anubisdb.py | 2 +- bbot/modules/internal/dns.py | 4 +--- bbot/test/test_step_1/test_dns.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/bbot/modules/anubisdb.py b/bbot/modules/anubisdb.py index 9864e3c6d8..bf4c88e935 100644 --- a/bbot/modules/anubisdb.py +++ b/bbot/modules/anubisdb.py @@ -30,7 +30,7 @@ def abort_if_pre(self, hostname): async def abort_if(self, event): # abort if dns name is unresolved - if not "resolved" in event.tags: + if event.type == "DNS_NAME_UNRESOLVED": return True, "DNS name is unresolved" return await super().abort_if(event) diff --git a/bbot/modules/internal/dns.py b/bbot/modules/internal/dns.py index ea5e4efcf1..b96b9b19c7 100644 --- a/bbot/modules/internal/dns.py +++ b/bbot/modules/internal/dns.py @@ -94,9 +94,7 @@ async def handle_event(self, event, kwargs): if rdtype not in dns_children: dns_tags.add(f"{rdtype.lower()}-error") - if dns_children: - dns_tags.add("resolved") - elif not event_is_ip: + if not dns_children and not event_is_ip: dns_tags.add("unresolved") for rdtype, children in dns_children.items(): diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index afc5c1967f..7fc93ab79a 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -227,7 +227,6 @@ async def test_wildcards(bbot_scanner): "a-record", "target", "aaaa-wildcard", - "resolved", "in-scope", "subdomain", "aaaa-record", @@ -249,7 +248,7 @@ async def test_wildcards(bbot_scanner): for e in events if e.type == "DNS_NAME" and e.data == "asdfl.gashdgkjsadgsdf.github.io" - and all(t in e.tags for t in ("a-record", "target", "resolved", "in-scope", "subdomain", "aaaa-record")) + and all(t in e.tags for t in ("a-record", "target", "in-scope", "subdomain", "aaaa-record")) and not any(t in e.tags for t in ("wildcard", "a-wildcard", "aaaa-wildcard")) ] ) From c770d83acf77d6a6fb3bf4cf9bffb103fb7d4391 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 23 Apr 2024 12:53:57 -0400 Subject: [PATCH 63/63] better engine error handling during scan cancellation --- bbot/core/engine.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/bbot/core/engine.py b/bbot/core/engine.py index c72eecbb32..24781ab3b3 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -153,19 +153,26 @@ async def run_and_return(self, client_id, command_fn, **kwargs): error = f"Unhandled error in {self.name}.{command_fn.__name__}({kwargs}): {e}" trace = traceback.format_exc() result = {"_e": (error, trace)} - await self.socket.send_multipart([client_id, pickle.dumps(result)]) + await self.send_socket_multipart([client_id, pickle.dumps(result)]) async def run_and_yield(self, client_id, command_fn, **kwargs): self.log.debug(f"{self.name} run-and-yield {command_fn.__name__}({kwargs})") try: async for _ in command_fn(**kwargs): - await self.socket.send_multipart([client_id, pickle.dumps(_)]) - await self.socket.send_multipart([client_id, pickle.dumps({"_s": None})]) + await self.send_socket_multipart([client_id, pickle.dumps(_)]) + await self.send_socket_multipart([client_id, pickle.dumps({"_s": None})]) except Exception as e: error = f"Unhandled error in {self.name}.{command_fn.__name__}({kwargs}): {e}" trace = traceback.format_exc() result = {"_e": (error, trace)} - await self.socket.send_multipart([client_id, pickle.dumps(result)]) + await self.send_socket_multipart([client_id, pickle.dumps(result)]) + + async def send_socket_multipart(self, *args, **kwargs): + try: + await self.socket.send_multipart(*args, **kwargs) + except Exception as e: + self.log.warning(f"Error sending ZMQ message: {e}") + self.log.trace(traceback.format_exc()) async def worker(self): try: