From a1e1e4dc9bd81abf6e03e6eae955409c6aa215be Mon Sep 17 00:00:00 2001 From: Cecilia Stevens <63068179+ceciliastevens@users.noreply.github.com> Date: Tue, 17 Mar 2026 12:32:14 -0400 Subject: [PATCH 1/3] INTEG-3135 - grouped searches --- CHANGELOG.md | 8 + docs/sdk/clients/file_event_queries.md | 8 + docs/sdk/models.md | 10 + src/_incydr_cli/cmds/file_events.py | 124 +++++++++++- src/_incydr_sdk/file_events/client.py | 24 +++ .../file_events/models/response.py | 37 ++++ src/_incydr_sdk/queries/file_events.py | 60 ++++-- src/incydr/__init__.py | 2 + src/incydr/models.py | 4 + tests/test_file_events.py | 190 +++++++++++++++++- 10 files changed, 449 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9cc91f0..7bc21bd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,14 @@ how a consumer would use the library or CLI tool (e.g. adding unit tests, updating documentation, etc) are not captured here. +## Unreleased + +### Added +- Added the `sdk.file-events.v2.search_groups` method to get approximate aggregate file event counts by a given grouping term. +- Added the `GroupingEventQuery` class, used to make these queries. +- Added the cli command `incydr file-events search-groups` to get approximate aggregate file event counts by a given grouping term. + + ## 2.11.0 - 2026-02-10 ### Added diff --git a/docs/sdk/clients/file_event_queries.md b/docs/sdk/clients/file_event_queries.md index c75f3d52..66218558 100644 --- a/docs/sdk/clients/file_event_queries.md +++ b/docs/sdk/clients/file_event_queries.md @@ -8,6 +8,14 @@ Use the `EventQuery` class to create a query for searching and filtering file ev :docstring: :members: equals not_equals exists does_not_exist greater_than less_than matches_any is_any is_none date_range subquery +## GroupingEventQuery Class + +Use the `GroupingEventQuery` class to create a query for searching for approximate event counts, grouped by a term called the `grouping_term`. `GroupingEventQuery` supports all of the same operators as `EventQuery`, with the addition of `group_by` and `maximum_size`, which can be used to control the grouping term and the maximum size of the response. + +::: _incydr_sdk.queries.file_events.GroupingEventQuery + :docstring: + :members: group_by maximum_size + ## Query Building The `EventQuery` class can be imported directly from the `incydr` module. diff --git a/docs/sdk/models.md b/docs/sdk/models.md index 20d61b70..dd4015fe 100644 --- a/docs/sdk/models.md +++ b/docs/sdk/models.md @@ -171,6 +171,16 @@ Devices has been replaced by [Agents](#agents). ::: incydr.models.SavedSearch :docstring: +### `GroupedFileEventResponse` model + +::: incydr.models.GroupedFileEventResponse + :docstring: + +### `FileEventGroup` model + +::: incydr.models.FileEventGroup + :docstring: + ## Roles --- diff --git a/src/_incydr_cli/cmds/file_events.py b/src/_incydr_cli/cmds/file_events.py index 69dfe80e..804b6496 100644 --- a/src/_incydr_cli/cmds/file_events.py +++ b/src/_incydr_cli/cmds/file_events.py @@ -31,8 +31,10 @@ from _incydr_sdk.enums.file_events import RiskIndicators from _incydr_sdk.enums.file_events import RiskSeverity from _incydr_sdk.file_events.models.event import FileEventV2 +from _incydr_sdk.file_events.models.response import FileEventGroup from _incydr_sdk.file_events.models.response import SavedSearch from _incydr_sdk.queries.file_events import EventQuery +from _incydr_sdk.queries.file_events import GroupingEventQuery from _incydr_sdk.utils import model_as_card @@ -100,7 +102,7 @@ def search( elif advanced_query: if not isinstance(advanced_query, str): advanced_query = advanced_query.read() - query = EventQuery.parse_raw(advanced_query) + query = EventQuery.model_validate_json(advanced_query) else: if not start: raise BadOptionUsage( @@ -108,6 +110,7 @@ def search( "--start option required if not using --saved-search or --advanced-query options.", ) query = _create_query( + cls=EventQuery, start=start, end=end, event_action=event_action, @@ -191,6 +194,121 @@ def yield_all_events(q: EventQuery): console.print("No results found.") +@file_events.command(cls=IncydrCommand) +@click.option( + "--group-by", + default=None, + help="(required) The term by which approximate counts will be grouped. Example: `user.email`.", + required=True, +) +@table_format_option +@columns_option +@output_options +@advanced_query_option +@saved_search_option +@event_filter_options +@logging_options +def search_groups( + format_: TableFormat, + columns: Optional[str], + output: Optional[str], + certs: Optional[str], + ignore_cert_validation: Optional[bool], + advanced_query: Optional[Union[str, File]], + saved_search: Optional[str], + start: Optional[str], + end: Optional[str], + event_action: Optional[str], + username: Optional[str], + md5: Optional[str], + sha256: Optional[str], + source_category: Optional[str], + destination_category: Optional[str], + file_name: Optional[str], + file_directory: Optional[str], + file_category: Optional[str], + risk_indicator: Optional[RiskIndicators], + risk_severity: Optional[RiskSeverity], + risk_score: Optional[int], + group_by: Optional[str], +): + """ + Retrieve approximate aggregated file event counts. Various options are provided to filter query results. + + Use the `--saved-search` or the `--advanced-query` option if the available filters don't satisfy your requirements. + + Results will be output to the console by default, use the `--output` option to send data to a server. + + This method returns approximate counts, grouped by the provided term. To obtain full event details, use the `search` method. + """ + if output: + format_ = TableFormat.json_lines + + client = Client() + + if not group_by: + raise BadOptionUsage( + "group_by", + "--group-by option is required.", + ) + + if saved_search: + saved_search = client.file_events.v2.get_saved_search(saved_search) + query = GroupingEventQuery.from_saved_search(saved_search) + elif advanced_query: + if not isinstance(advanced_query, str): + advanced_query = advanced_query.read() + query = GroupingEventQuery.model_validate_json(advanced_query) + else: + if not start: + raise BadOptionUsage( + "start", + "--start option required if not using --saved-search or --advanced-query options.", + ) + query = _create_query( + cls=GroupingEventQuery, + start=start, + end=end, + event_action=event_action, + username=username, + md5=md5, + sha256=sha256, + source_category=source_category, + destination_category=destination_category, + file_name=file_name, + file_directory=file_directory, + file_category=file_category, + risk_indicator=risk_indicator, + risk_severity=risk_severity, + risk_score=risk_score, + ) + + query.group_by(group_by).maximum_size(10000) + + groups = client.file_events.v2.search_groups(query).groups or [] + + if output: + logger = get_server_logger(output, certs, ignore_cert_validation) + for group in groups: + logger.info(json.dumps(group)) + return + + if format_ == TableFormat.csv: + render.csv(FileEventGroup, groups, columns=columns, flat=True) + elif format_ == TableFormat.table: + render.table(FileEventGroup, groups, columns=columns, flat=False) + else: + printed = False + for group in groups: + printed = True + if format_ == TableFormat.json_pretty: + console.print_json(data=group) + else: + click.echo(json.dumps(group.dict())) + if not printed: + console.print("No results found.") + + @file_events.command() @click.argument("checkpoint-name") def clear_checkpoint(checkpoint_name: str): @@ -262,8 +380,8 @@ def list_saved_searches( } -def _create_query(**kwargs): - query = EventQuery(start_date=kwargs["start"], end_date=kwargs["end"]) +def _create_query(cls, **kwargs): + query = cls(start_date=kwargs["start"], end_date=kwargs["end"]) for k, v in kwargs.items(): if v: if k in ["start", "end"]: diff --git a/src/_incydr_sdk/file_events/client.py b/src/_incydr_sdk/file_events/client.py index 00ac95b6..83d30299 100644 --- a/src/_incydr_sdk/file_events/client.py +++ b/src/_incydr_sdk/file_events/client.py @@ -7,8 +7,10 @@ from ..exceptions import IncydrException from .models.response import FileEventsPage +from .models.response import GroupedFileEventResponse from .models.response import SavedSearch from _incydr_sdk.queries.file_events import EventQuery +from _incydr_sdk.queries.file_events import GroupingEventQuery class InvalidQueryException(IncydrException): @@ -74,6 +76,28 @@ def search(self, query: EventQuery) -> FileEventsPage: query.page_token = page.next_pg_token return page + def search_groups(self, query: GroupingEventQuery) -> GroupedFileEventResponse: + """ + Search for file event counts by a grouping term. + + **Parameters**: + + * **query**: `GroupingEventQuery` (required) - The query object to group file events by a given field. + + **Returns**: A [`GroupedFileEventResponse`][groupedfileeventresponse-model] object.""" + self._mount_retry_adapter() + + try: + response = self._parent.session.post( + "/v2/file-events/grouping", json=query.dict() + ) + except HTTPError as err: + if err.response.status_code == 400: + raise InvalidQueryException(query=query, exception=err) + raise err + response = GroupedFileEventResponse.parse_response(response) + return response + def list_saved_searches(self) -> List[SavedSearch]: """ Get all saved searches. diff --git a/src/_incydr_sdk/file_events/models/response.py b/src/_incydr_sdk/file_events/models/response.py index 75e93d18..ed6390cc 100644 --- a/src/_incydr_sdk/file_events/models/response.py +++ b/src/_incydr_sdk/file_events/models/response.py @@ -218,3 +218,40 @@ class SavedSearch(ResponseModel): description="Search term for sorting.", examples=["event.id"], ) + + +class FileEventGroup(ResponseModel): + """A model representing a single group in a grouped response. + + **Fields:** + + * **value**: `str` - The value of the term for this group. + * **doc_count**: `int` - The approximate count of hits matching this value for your query. + """ + + value: Optional[str] = Field( + None, description="The value of the term for this group." + ) + doc_count: Optional[int] = Field( + None, + description="The approximate count of hits matching this value for your query.", + alias="docCount", + ) + + +class GroupedFileEventResponse(ResponseModel): + """A model representing a response of grouped file events. + + **Fields:** + + * **groups**: `List[FileEventGroup]` - A list of file event counts by grouping term and doc count. + * **problems**: `List[QueryProblem]` - List of problems in the request. A problem with a search request could be an invalid filter value, an operator that can't be used on a term, etc. + """ + + groups: Optional[List[FileEventGroup]] = Field( + None, description="A list of file event counts by grouping term and doc count." + ) + problems: Optional[List[QueryProblem]] = Field( + None, + description="List of problems in the request. A problem with a search request could be an invalid filter value, an operator that can't be used on a term, etc.", + ) diff --git a/src/_incydr_sdk/queries/file_events.py b/src/_incydr_sdk/queries/file_events.py index e88caa80..ef0f8c44 100644 --- a/src/_incydr_sdk/queries/file_events.py +++ b/src/_incydr_sdk/queries/file_events.py @@ -106,23 +106,13 @@ class Query(Model): srtKey: EventSearchTerm = "event.id" -class EventQuery(Model): +class BaseEventQuery(Model): """ - Class to build a file event query. Use the class methods to attach additional filter operators. - - **Parameters**: - - * **start_date**: `int`, `float`, `str`, `datetime`, `timedelta` - Start of the date range to query for events. Defaults to None. - * **end_date**: `int`, `float`, `str`, `datetime` - End of the date range to query for events. Defaults to None. + Base class used for EventQuery and GroupingEventQuery. """ group_clause: str = Field("AND", alias="groupClause") groups: Optional[List[FilterGroup]] - page_num: int = Field(1, alias="pgNum") - page_size: Annotated[int, Field(le=10000)] = Field(100, alias="pgSize") - page_token: Optional[str] = Field("", alias="pgToken") - sort_dir: str = Field("asc", alias="srtDir") - sort_key: EventSearchTerm = Field("event.id", alias="srtKey") model_config = ConfigDict( validate_assignment=True, use_enum_values=True, @@ -424,6 +414,52 @@ def from_saved_search(cls, saved_search: SavedSearch): return query +class EventQuery(BaseEventQuery): + """ + Class to build a file event query. Use the class methods to attach additional filter operators. + + **Parameters**: + + * **start_date**: `int`, `float`, `str`, `datetime`, `timedelta` - Start of the date range to query for events. Defaults to None. + * **end_date**: `int`, `float`, `str`, `datetime` - End of the date range to query for events. Defaults to None. + """ + + page_num: int = Field(1, alias="pgNum") + page_size: Annotated[int, Field(le=10000)] = Field(100, alias="pgSize") + page_token: Optional[str] = Field("", alias="pgToken") + sort_dir: str = Field("asc", alias="srtDir") + sort_key: EventSearchTerm = Field("event.id", alias="srtKey") + + +class GroupingEventQuery(BaseEventQuery): + """ + Class to build a file event query for use in grouped searches, which return aggregated counts. The `grouping_term` parameter determines by what term the result will be aggregated. + + Use the class methods to attach additional filter operators. + + **Parameters**: + + * **start_date**: `int`, `float`, `str`, `datetime`, `timedelta` - Start of the date range to query for events. Defaults to None. + * **end_date**: `int`, `float`, `str`, `datetime` - End of the date range to query for events. Defaults to None. + * **grouping_term**: `str` - The search term to use to form the groups + * **size**: `int` - The maximum number of groups that will be returned for this query. Default value is 1000. Maximum possible value is 10,000. + """ + + grouping_term: str = Field("", alias="groupingTerm") + size: int = Field(1000) + + def group_by(self, grouping_term: str): + """Sets the grouping term for this query. When the query is run, it will group events by this term. For example, to group by file category, + set the term to `file.category`""" + self.grouping_term = grouping_term + return self + + def maximum_size(self, size: int): + """Sets the maximum number of groups that will be returned for this query. Defaults to 1000. Maximum possible value supported by the API is 10000.""" + self.size = size + return self + + def _create_date_range_filter_group(start_date, end_date, term=None): def _validate_duration_str(iso_duration_str): try: diff --git a/src/incydr/__init__.py b/src/incydr/__init__.py index 4e72a972..f29dde1d 100644 --- a/src/incydr/__init__.py +++ b/src/incydr/__init__.py @@ -8,12 +8,14 @@ from _incydr_sdk.core.client import Client from _incydr_sdk.queries.alerts import AlertQuery from _incydr_sdk.queries.file_events import EventQuery +from _incydr_sdk.queries.file_events import GroupingEventQuery __all__ = [ "__version__", "Client", "AlertQuery", "EventQuery", + "GroupingEventQuery", "models", "exceptions", ] diff --git a/src/incydr/models.py b/src/incydr/models.py index 1c33b612..cf7ab167 100644 --- a/src/incydr/models.py +++ b/src/incydr/models.py @@ -21,7 +21,9 @@ from _incydr_sdk.directory_groups.models import DirectoryGroupsPage from _incydr_sdk.file_events.models.event import FileEventV2 from _incydr_sdk.file_events.models.event import User +from _incydr_sdk.file_events.models.response import FileEventGroup from _incydr_sdk.file_events.models.response import FileEventsPage +from _incydr_sdk.file_events.models.response import GroupedFileEventResponse from _incydr_sdk.file_events.models.response import SavedSearch from _incydr_sdk.risk_profiles.models import RiskProfile from _incydr_sdk.risk_profiles.models import RiskProfilesPage @@ -69,6 +71,8 @@ "SavedSearch", "FileEventsPage", "FileEventV2", + "GroupedFileEventResponse", + "FileEventGroup", "User", "UsersPage", "UserRole", diff --git a/tests/test_file_events.py b/tests/test_file_events.py index be58049c..26910feb 100644 --- a/tests/test_file_events.py +++ b/tests/test_file_events.py @@ -18,6 +18,7 @@ from _incydr_sdk.file_events.models.response import SearchFilter from _incydr_sdk.file_events.models.response import SearchFilterGroup from _incydr_sdk.queries.file_events import EventQuery +from _incydr_sdk.queries.file_events import GroupingEventQuery MICROSECOND_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" @@ -781,6 +782,24 @@ "totalCount": 1 }""" +TEST_GROUPING_RESPONSE = """{"groups":[{"value":"email1@example.com","docCount":302},{"value":"email2@example.com","docCount":166},{"value":"email3@example.com","docCount":73},{"value":"email4@example.com","docCount":21},{"value":"email5@example.com","docCount":14},{"value":"email6@example.com","docCount":7},{"value":"email7@example.com","docCount":1}],"problems":null}""" + +TEST_GROUPING_QUERY = ( + GroupingEventQuery(start_date="P14D") + .equals("user.email", ["test@code42.com", "john.doe@code42.com"]) + .equals("file.category", "SourceCode") + .group_by("user.email") +) + + +@pytest.fixture +def mock_grouped_search(httpserver_auth): + httpserver_auth.expect_request( + "/v2/file-events/grouping", + method="POST", + json=json.loads(TEST_GROUPING_QUERY.json()), + ).respond_with_json(json.loads(TEST_GROUPING_RESPONSE)) + @pytest.fixture def mock_get_saved_search(httpserver_auth): @@ -811,6 +830,14 @@ def mock_list_saved_searches(httpserver_auth): ).respond_with_json(search_data) +def test_search_groups_sends_expected_query( + mock_grouped_search, httpserver_auth: HTTPServer +): + client = Client() + client.file_events.v2.search_groups(TEST_GROUPING_QUERY) + httpserver_auth.check() + + @pytest.mark.parametrize( "query, expected_query", [(TEST_EVENT_QUERY, TEST_DICT_QUERY)], @@ -845,11 +872,11 @@ def test_search_returns_expected_data(httpserver_auth: HTTPServer): ) client = Client() - query = EventQuery.construct(**TEST_DICT_QUERY) + query = EventQuery.model_construct(**TEST_DICT_QUERY) page = client.file_events.v2.search(query) assert isinstance(page, FileEventsPage) - assert page.file_events[0] == FileEventV2.parse_obj(TEST_EVENT_1) - assert page.file_events[1] == FileEventV2.parse_obj(TEST_EVENT_2) + assert page.file_events[0] == FileEventV2.model_validate(TEST_EVENT_1) + assert page.file_events[1] == FileEventV2.model_validate(TEST_EVENT_2) assert page.total_count == len(page.file_events) @@ -1323,3 +1350,160 @@ def test_cli_list_saved_searches_makes_expected_api_call( result = runner.invoke(incydr, ["file-events", "list-saved-searches"]) httpserver_auth.check() assert result.exit_code == 0 + + +def test_cli_search_groups_makes_expected_api_call( + httpserver_auth: HTTPServer, runner, mock_grouped_search +): + query = { + "groupClause": "AND", + "groups": [ + { + "filterClause": "AND", + "filters": [ + { + "term": "@timestamp", + "operator": "ON_OR_AFTER", + "value": "2022-06-01T00:00:00.000Z", + }, + { + "term": "@timestamp", + "operator": "ON_OR_BEFORE", + "value": "2022-08-10T00:00:00.000Z", + }, + ], + }, + { + "filterClause": "AND", + "filters": [ + {"term": "event.action", "operator": "IS", "value": "file-created"} + ], + }, + { + "filterClause": "AND", + "filters": [ + {"term": "user.email", "operator": "IS", "value": "foo@bar.com"} + ], + }, + { + "filterClause": "AND", + "filters": [ + {"term": "file.hash.md5", "operator": "IS", "value": "foo"} + ], + }, + { + "filterClause": "AND", + "filters": [ + {"term": "file.hash.sha256", "operator": "IS", "value": "bar"} + ], + }, + { + "filterClause": "AND", + "filters": [ + { + "term": "source.category", + "operator": "IS", + "value": "Coding Tools", + } + ], + }, + { + "filterClause": "AND", + "filters": [ + { + "term": "destination.category", + "operator": "IS", + "value": "Web Hosting", + } + ], + }, + { + "filterClause": "AND", + "filters": [{"term": "file.name", "operator": "IS", "value": "baz"}], + }, + { + "filterClause": "AND", + "filters": [ + { + "term": "file.directory", + "operator": "IS", + "value": "C://foo/bar.txt", + } + ], + }, + { + "filterClause": "AND", + "filters": [ + {"term": "file.category", "operator": "IS", "value": "Document"} + ], + }, + { + "filterClause": "AND", + "filters": [ + { + "term": "risk.indicators.name", + "operator": "IS", + "value": "Bitbucket upload", + } + ], + }, + { + "filterClause": "AND", + "filters": [ + {"term": "risk.severity", "operator": "IS", "value": "HIGH"} + ], + }, + { + "filterClause": "AND", + "filters": [ + {"term": "risk.score", "operator": "GREATER_THAN", "value": 10} + ], + }, + ], + "groupingTerm": "user.email", + "size": 10000, + } + + httpserver_auth.expect_request( + "/v2/file-events/grouping", method="POST", json=query + ).respond_with_json(json.loads(TEST_GROUPING_RESPONSE)) + + result = runner.invoke( + incydr, + [ + "file-events", + "search-groups", + "--start", + "2022-06-01", + "--end", + "2022-08-10", + "--event-action", + "file-created", + "--username", + "foo@bar.com", + "--md5", + "foo", + "--sha256", + "bar", + "--source-category", + "Coding Tools", + "--destination-category", + "Web Hosting", + "--file-name", + "baz", + "--file-directory", + "C://foo/bar.txt", + "--file-category", + "Document", + "--risk-indicator", + "Bitbucket upload", + "--risk-severity", + "HIGH", + "--risk-score", + "10", + "--group-by", + "user.email", + ], + ) + httpserver_auth.check() + assert result.exit_code == 0 From 0308b078901f6b406afbad5f2d88129bdf41e2ed Mon Sep 17 00:00:00 2001 From: Cecilia Stevens <63068179+ceciliastevens@users.noreply.github.com> Date: Tue, 17 Mar 2026 13:25:50 -0400 Subject: [PATCH 2/3] try unpinning hatch --- .github/workflows/build.yml | 2 +- .github/workflows/docs.yml | 2 +- .github/workflows/style.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 86b1e45d..c7e96b2b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,7 +29,7 @@ jobs: - name: Install hatch run: | - pip install hatch==1.14.0 + pip install hatch pip install . - name: Run tests diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 6933f368..d60cbdeb 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -23,6 +23,6 @@ jobs: - name: Install click run: pip install click==8.1.8 - name: Install hatch - run: pip install hatch==1.14.0 + run: pip install hatch - name: Build docs run: hatch run docs:build diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 8b4fafb5..ef06d525 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -25,7 +25,7 @@ jobs: run: pip install click===8.1.8 - name: Install hatch - run: pip install hatch==1.14.0 + run: pip install hatch - name: Run style checks run: hatch run style:check From ebb42cedb191450903d999003166c5956fcc43a9 Mon Sep 17 00:00:00 2001 From: Cecilia Stevens <63068179+ceciliastevens@users.noreply.github.com> Date: Thu, 19 Mar 2026 09:12:32 -0400 Subject: [PATCH 3/3] PR feedback --- src/_incydr_cli/cmds/file_events.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/_incydr_cli/cmds/file_events.py b/src/_incydr_cli/cmds/file_events.py index 804b6496..0825e419 100644 --- a/src/_incydr_cli/cmds/file_events.py +++ b/src/_incydr_cli/cmds/file_events.py @@ -246,12 +246,6 @@ def search_groups( client = Client() - if not group_by: - raise BadOptionUsage( - "group_by", - "--group-by option is required.", - ) - if saved_search: saved_search = client.file_events.v2.get_saved_search(saved_search) query = GroupingEventQuery.from_saved_search(saved_search) @@ -290,7 +284,7 @@ def search_groups( if output: logger = get_server_logger(output, certs, ignore_cert_validation) for group in groups: - logger.info(json.dumps(group)) + logger.info(json.dumps(group.dict())) return if format_ == TableFormat.csv: