1919from urllib .parse import urlparse , urljoin , parse_qs
2020
2121
22- from .helpers import *
2322from bbot .errors import *
23+ from .helpers import EventSeed
2424from bbot .core .helpers import (
2525 extract_words ,
2626 is_domain ,
@@ -111,18 +111,66 @@ class BaseEvent:
111111 # Bypass scope checking and dns resolution, distribute immediately to modules
112112 # This is useful for "end-of-line" events like FINDING and VULNERABILITY
113113 _quick_emit = False
114- # Whether this event has been retroactively marked as part of an important discovery chain
115- _graph_important = False
116- # Disables certain data validations
117- _dummy = False
118114 # Data validation, if data is a dictionary
119115 _data_validator = None
120116 # Whether to increment scope distance if the child and parent hosts are the same
117+ # Normally we don't want this, since scope distance only increases if the host changes
118+ # But for some events like SOCIAL media profiles, this is required to prevent spidering all of facebook.com
121119 _scope_distance_increment_same_host = False
122120 # Don't allow duplicates to occur within a parent chain
123121 # In other words, don't emit the event if the same one already exists in its discovery context
124122 _suppress_chain_dupes = False
125123
124+ # using __slots__ dramatically reduces memory usage in large scans
125+ __slots__ = [
126+ # Core identification attributes
127+ "_uuid" ,
128+ "_id" ,
129+ "_hash" ,
130+ "_data" ,
131+ "_data_hash" ,
132+ # Host-related attributes
133+ "__host" ,
134+ "_host_original" ,
135+ "_port" ,
136+ # Parent-related attributes
137+ "_parent" ,
138+ "_parent_id" ,
139+ "_parent_uuid" ,
140+ # Event metadata
141+ "_type" ,
142+ "_tags" ,
143+ "_omit" ,
144+ "__words" ,
145+ "_priority" ,
146+ "_scope_distance" ,
147+ "_module_priority" ,
148+ "_graph_important" ,
149+ "_resolved_hosts" ,
150+ "_discovery_context" ,
151+ "_discovery_context_regex" ,
152+ "_stats_recorded" ,
153+ "_internal" ,
154+ "_confidence" ,
155+ "_dummy" ,
156+ "_module" ,
157+ # DNS-related attributes
158+ "dns_children" ,
159+ "raw_dns_records" ,
160+ "dns_resolve_distance" ,
161+ # Web-related attributes
162+ "web_spider_distance" ,
163+ "parsed_url" ,
164+ "url_extension" ,
165+ "num_redirects" ,
166+ # File-related attributes
167+ "_data_path" ,
168+ # Public attributes
169+ "module" ,
170+ "scan" ,
171+ "timestamp" ,
172+ ]
173+
126174 def __init__ (
127175 self ,
128176 data ,
@@ -131,7 +179,6 @@ def __init__(
131179 context = None ,
132180 module = None ,
133181 scan = None ,
134- scans = None ,
135182 tags = None ,
136183 confidence = 100 ,
137184 timestamp = None ,
@@ -150,7 +197,6 @@ def __init__(
150197 parent (BaseEvent, optional): Parent event that led to this event's discovery. Defaults to None.
151198 module (str, optional): Module that discovered the event. Defaults to None.
152199 scan (Scan, optional): BBOT Scan object. Required unless _dummy is True. Defaults to None.
153- scans (list of Scan, optional): BBOT Scan objects, used primarily when unserializing an Event from the database. Defaults to None.
154200 tags (list of str, optional): Descriptive tags for the event. Defaults to None.
155201 confidence (int, optional): Confidence level for the event, on a scale of 1-100. Defaults to 100.
156202 timestamp (datetime, optional): Time of event discovery. Defaults to current UTC time.
@@ -176,6 +222,7 @@ def __init__(
176222 self ._host_original = None
177223 self ._scope_distance = None
178224 self ._module_priority = None
225+ self ._graph_important = False
179226 self ._resolved_hosts = set ()
180227 self .dns_children = {}
181228 self .raw_dns_records = {}
@@ -206,12 +253,6 @@ def __init__(
206253 self .scan = scan
207254 if (not self .scan ) and (not self ._dummy ):
208255 raise ValidationError ("Must specify scan" )
209- # self.scans holds a list of scan IDs from scans that encountered this event
210- self .scans = []
211- if scans is not None :
212- self .scans = scans
213- if self .scan :
214- self .scans = list (set ([self .scan .id ] + self .scans ))
215256
216257 try :
217258 self .data = self ._sanitize_data (data )
@@ -1350,7 +1391,7 @@ def sanitize_data(self, data):
13501391 return validators .validate_email (data )
13511392
13521393 def _host (self ):
1353- data = str (self .data ).split ("@" )[- 1 ]
1394+ data = str (self .data ).rsplit ("@" , 1 )[- 1 ]
13541395 host , self ._port = split_host_port (data )
13551396 return host
13561397
@@ -1654,7 +1695,6 @@ def make_event(
16541695 context = None ,
16551696 module = None ,
16561697 scan = None ,
1657- scans = None ,
16581698 tags = None ,
16591699 confidence = 100 ,
16601700 dummy = False ,
@@ -1714,12 +1754,11 @@ def make_event(
17141754 tags = [tags ]
17151755 tags = set (tags )
17161756
1757+ # if data is already an event, update it with the user's kwargs
17171758 if is_event (data ):
17181759 event = copy (data )
17191760 if scan is not None and not event .scan :
17201761 event .scan = scan
1721- if scans is not None and not event .scans :
1722- event .scans = scans
17231762 if module is not None :
17241763 event .module = module
17251764 if parent is not None :
@@ -1733,8 +1772,11 @@ def make_event(
17331772 event_type = data .type
17341773 return event
17351774 else :
1775+ # if event_type is not provided, autodetect it
17361776 if event_type is None :
1737- event_type , data = get_event_type (data )
1777+ event_seed = EventSeed (data )
1778+ event_type = event_seed .type
1779+ data = event_seed .data
17381780 if not dummy :
17391781 log .debug (f'Autodetected event type "{ event_type } " based on data: "{ data } "' )
17401782
@@ -1778,7 +1820,6 @@ def make_event(
17781820 context = context ,
17791821 module = module ,
17801822 scan = scan ,
1781- scans = scans ,
17821823 tags = tags ,
17831824 confidence = confidence ,
17841825 _dummy = dummy ,
@@ -1812,7 +1853,6 @@ def event_from_json(j):
18121853 event_type = j ["type" ]
18131854 kwargs = {
18141855 "event_type" : event_type ,
1815- "scans" : j .get ("scans" , []),
18161856 "tags" : j .get ("tags" , []),
18171857 "confidence" : j .get ("confidence" , 100 ),
18181858 "context" : j .get ("discovery_context" , None ),
0 commit comments