Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ jobs:
matrix:
python-version: ${{ fromJSON(vars.CI_PYTHON_VERSIONS) }}
# build for (last 5) major supported versions to ensure compatibility to some degree, but really recommended, linted and deployed is only the latest
# due to csv-writing issues with versions below 3.12, versions before 3.12 are not supported


steps:
Expand All @@ -43,3 +42,8 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install .
pip install pytest

- name: Run tests
run: |
pytest src/cadenzaanalytics/tests/ -v
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Added
- Support for measure aggregation types MEDIAN and STANDARD_DEVIATION_SAMPLE
- Added `cadenzaAnalyticsVersion` to capabilities responses to help identify issues when adding or using an extension in Cadenza

### Changed
- Improved type annotations for mappings and parameter values
- Uses custom csv handling instead of pandas csv handling to fix various edge cases
- Minimum required python version reduced to 3.11
- values of data type `ZONED_DATE_TIME` are now received as pandas Timestamps and normalized to UTC

### Fixed
- the `basic-extension` example enrichment now actually enriches the data

## 10.4.0 - 2025-12-05
### Added
Expand Down
4 changes: 1 addition & 3 deletions examples/enrichment/extension/example_extensions.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
"""Example module for running a disy Cadenza analytics extension that
will execute a basic enrichment."""

import pandas as pd

import cadenzaanalytics as ca


def enrichment_basic_analytics_function(request: ca.AnalyticsRequest):
# pylint: disable=unused-argument
df_data = pd.DataFrame()
df_data = request["table"].data
df_data["new_value"] = "value"

result_metadata = [
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,16 @@ classifiers = [
]

[tool.poetry.dependencies]
python = "^3.12"
python = "^3.10"
Flask = "3.1.2"
Werkzeug = "3.1.4"
Flask-Cors = "6.0.1"
requests-toolbelt = "1.0.0"
pandas = " ^2.0.2"
chardet = "5.2.0"
Shapely = "2.1.2"
pytest = "9.0.2"
tzlocal = "5.3.1"

[project]
name = "cadenzaanalytics"
Expand Down
65 changes: 28 additions & 37 deletions src/cadenzaanalytics/cadenza_analytics_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
invoked via HTTP POST on the relative path."""
import json
import logging
from io import StringIO
from datetime import datetime
from typing import Callable, List, Optional
from tzlocal import get_localzone_name

import numpy as np
import pandas as pd
from flask import Response, request
from shapely import from_wkt

from cadenzaanalytics.data.analytics_extension import AnalyticsExtension
from cadenzaanalytics.data.extension_type import ExtensionType
Expand All @@ -21,6 +19,7 @@
from cadenzaanalytics.request.request_metadata import RequestMetadata
from cadenzaanalytics.request.request_table import RequestTable
from cadenzaanalytics.response.extension_response import ExtensionResponse
from cadenzaanalytics.util.csv import from_cadenza_csv


logger = logging.getLogger('cadenzaanalytics')
Expand Down Expand Up @@ -151,58 +150,50 @@ def _get_request_data(self, multipart_request) -> AnalyticsRequest:
if len(metadata.columns) > 0:
has_data = True
type_mapping = {}
na_values_mapping = {}
datetime_columns = []
geometry_columns = []

for column in metadata.columns:
if column.data_type == DataType.ZONEDDATETIME:
datetime_columns.append(column.name)
# must be empty list, otherwise pd.read_csv interprets empty strings as NA which
# is rejected by the parse_dates mechanism before it reaches the _parse_datetime function
na_values_mapping[column.name] = []
elif column.data_type == DataType.STRING:
# only empty strings must be considered as NA
# unfortunately there does not seem to be a way to interpret empty quotes as empty string
# and unquoted as None
na_values_mapping[column.name] = ['']
else:
# pandas default list of NA values, mostly relevant for numeric columns
na_values_mapping[column.name] = ['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A',
'#N/A', 'N/A', 'n/a', 'NA', '<NA>', '#NA', 'NULL', 'null',
'NaN', '-NaN', 'nan', '-nan', 'None', '']

if column.data_type == DataType.GEOMETRY:
elif column.data_type == DataType.GEOMETRY:
geometry_columns.append(column.name)

type_mapping[column.name] = column.data_type.pandas_type()

csv_data = StringIO(self._get_from_request(multipart_request, 'data'))
# read_csv cannot distinguish None from empty strings
df_data = pd.read_csv(
csv_data = self._get_from_request(multipart_request, 'data')
# Use custom parser that properly handles quoted vs unquoted values
df_data = from_cadenza_csv(
csv_data,
sep=';',
dtype=type_mapping,
parse_dates=datetime_columns,
date_format='ISO8601',
na_values=na_values_mapping,
keep_default_na=False,
type_mapping=type_mapping,
datetime_columns=datetime_columns,
geometry_columns=geometry_columns
)

# Parse WKT geometries into shapely geometry objects using vectorized from_wkt
for gcol in geometry_columns:
values = df_data[gcol].to_numpy()
# from_wkt handles None values; replace empty strings with None
values = np.where((values == '') | pd.isna(values), None, values)
df_data[gcol] = from_wkt(values, on_invalid='warn')

logger.debug('Received data:\n%s', df_data.head())
else:
has_data = False
df_data = None
logger.debug('Received request without data')

analytics_request = AnalyticsRequest(parameters, cadenza_version=request.headers.get("X-Disy-Cadenza-Version"))
# use the analytics extension server timezone as a default, assuming they usually
# run in the same timezone as the Cadenza server. Cadenza versions after 10.4 will provide
# these timezone headers
analytics_extension_region = get_localzone_name()
analytics_extension_current_offset = datetime.now().astimezone().strftime('%z')
analytics_extension_current_offset_formatted = analytics_extension_current_offset[:3] + ':'
analytics_extension_current_offset_formatted += analytics_extension_current_offset[3:5]
if len(analytics_extension_current_offset) > 5:
# optional seconds and milliseconds (a dot already separates milliseconds)
analytics_extension_current_offset_formatted += ":" + analytics_extension_current_offset[5:]

analytics_request = AnalyticsRequest(
parameters,
cadenza_version=request.headers.get("X-Disy-Cadenza-Version"),
cadenza_timezone_region=request.headers.get("X-Disy-Cadenza-Timezone-Region",
default=analytics_extension_region),
cadenza_timezone_current_offset=request.headers.get("X-Disy-Cadenza-Timezone-Current-Offset",
default=analytics_extension_current_offset_formatted))
if has_data:
analytics_request[self._table_name] = RequestTable(df_data, metadata)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,5 +135,5 @@ def _list_extensions(self) -> Response:
result_dict['extensions'].append({'relativePath': extension.relative_path,
'extensionPrintName': extension.print_name,
'extensionType': extension.extension_type})

result_dict['cadenzaAnalyticsVersion'] = __version__
return Response(response=json.dumps(result_dict, default=str), status=200, mimetype="application/json")
5 changes: 4 additions & 1 deletion src/cadenzaanalytics/data/analytics_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from cadenzaanalytics.data.data_object import DataObject
from cadenzaanalytics.data.extension_type import ExtensionType
from cadenzaanalytics.data.parameter import Parameter
from cadenzaanalytics.version import __version__


class AnalyticsExtension(DataObject):
Expand All @@ -16,7 +17,8 @@ class AnalyticsExtension(DataObject):
"printName": "_print_name",
"extensionType": "_extension_type",
"attributeGroups": "_attribute_groups",
"parameters": "_parameters"
"parameters": "_parameters",
"cadenzaAnalyticsVersion": "_cadenza_analytics_version"
}

def __init__(self,
Expand All @@ -28,6 +30,7 @@ def __init__(self,
self._extension_type = extension_type
self._attribute_groups = attribute_groups
self._parameters = parameters
self._cadenza_analytics_version = __version__

@property
def print_name(self) -> str:
Expand Down
7 changes: 4 additions & 3 deletions src/cadenzaanalytics/data/parameter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import List, Any, Optional
from typing import List, Optional

from cadenzaanalytics.data.geometry_type import GeometryType
from cadenzaanalytics.data.data_object import DataObject
from cadenzaanalytics.data.parameter_type import ParameterType
from cadenzaanalytics.data.parameter_value_type import ParameterValueType


# pylint: disable=too-many-instance-attributes
Expand Down Expand Up @@ -31,7 +32,7 @@ def __init__(self, *,
geometry_types: Optional[List[GeometryType]] = None,
options: Optional[List[str]] = None,
required: bool = False,
default_value: Any = None,
default_value: Optional[ParameterValueType] = None,
requested_srs: Optional[str] = None) -> None:
"""Initialize a Parameter.

Expand All @@ -51,7 +52,7 @@ def __init__(self, *,
Whether the parameter is required, by default False.
For parameter type boolean, required=True makes submitting the value
True mandatory.
default_value : Any, optional
default_value : Optional[ParameterValueType], optional
Default value if the user doesn't provide one.
requested_srs : Optional[str], optional
Requested spatial reference system for GEOMETRY parameters.
Expand Down
9 changes: 5 additions & 4 deletions src/cadenzaanalytics/data/parameter_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cadenzaanalytics.data.data_type import DataType
from cadenzaanalytics.data.geometry_type import GeometryType
from cadenzaanalytics.data.data_object import DataObject
from cadenzaanalytics.data.parameter_value_type import ParameterValueType


class ParameterValue(DataObject):
Expand Down Expand Up @@ -92,12 +93,12 @@ def data_type(self) -> DataType:
return self._data_type

@property
def value(self) -> Any:
def value(self) -> Optional[ParameterValueType]:
"""Get the typed value of the parameter.

Returns
-------
Any
Optional[ParameterValueType]
The value of the parameter, typed according to the data type.
"""
return self._value
Expand Down Expand Up @@ -125,7 +126,7 @@ def srs(self) -> Optional[str]:
return self._srs


def _parse_value(self, value: Any, data_type: DataType) -> Any:
def _parse_value(self, value: Any, data_type: DataType) -> Optional[ParameterValueType]:
"""Parse and convert a parameter value according to its data type.

Parameters
Expand All @@ -137,7 +138,7 @@ def _parse_value(self, value: Any, data_type: DataType) -> Any:

Returns
-------
Any
Optional[ParameterValueType]
The parsed value with appropriate type, or None if input is None.
"""
if value is None:
Expand Down
9 changes: 9 additions & 0 deletions src/cadenzaanalytics/data/parameter_value_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

from datetime import datetime
from typing import Union

from shapely.geometry.base import BaseGeometry

# A typed value for parameters: numbers, strings, boolean, shapely geometries, and datetime
ParameterValueType = Union[int, float, str, bool, datetime, BaseGeometry]
35 changes: 33 additions & 2 deletions src/cadenzaanalytics/request/analytics_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@
from cadenzaanalytics.request.request_table import RequestTable


class AnalyticsRequest(collections.abc.Mapping):
class AnalyticsRequest(collections.abc.Mapping[str, RequestTable]):
"""Represents an incoming analytics request from Cadenza.

Provides access to request parameters and data tables. Supports dict-like
access to tables via `request["table_name"]` syntax.
"""

def __init__(self, parameters: RequestParameter, cadenza_version: str) -> None:
def __init__(self,
parameters: RequestParameter,
cadenza_version: str,
cadenza_timezone_region: str,
cadenza_timezone_current_offset: str) -> None:
"""Initialize an AnalyticsRequest.

Parameters
Expand All @@ -21,10 +25,16 @@ def __init__(self, parameters: RequestParameter, cadenza_version: str) -> None:
The request parameters provided by Cadenza.
cadenza_version : str
Version string of the Cadenza instance sending the request.
cadenza_timezone_region : str
The timezone region (e.g. "Europe/Berlin") of the Cadenza instance sending the request.
cadenza_timezone_current_offset : str
The current timezone offset (e.g. "+01:00") of the Cadenza instance sending the request.
"""
self._parameters = parameters
self._tables = {}
self._cadenza_version = cadenza_version
self._cadenza_timezone_region = cadenza_timezone_region
self._cadenza_timezone_current_offset = cadenza_timezone_current_offset

def __getitem__(self, key: str) -> RequestTable:
"""Returns the request table object by name.
Expand Down Expand Up @@ -76,3 +86,24 @@ def cadenza_version(self) -> Optional[str]:
The Cadenza version string, or None if not provided.
"""
return self._cadenza_version

@property
def cadenza_timezone_region(self):
"""Get the timezone region of the Cadenza instance that sent the request. If (an older version of)
Cadenza did not send a timezone region, this will be the region of this server.

:return: Region identifier, such as "Europe/Berlin".
"""
return self._cadenza_timezone_region

@property
def cadenza_timezone_current_offset(self):
"""Get the current timezone offset of the Cadenza instance that sent the request. If (an older version of)
Cadenza did not send a timezone offset, this will be the offset of this server.
This information is purely informational and volatile as it will change with the daylight savings time.
It should not be used to convert datetime objects
to zone-aware datetimes, for that use the cadenza_timezone_region property.

:return: Offset string, such as "+01:00" or "Z".
"""
return self._cadenza_timezone_current_offset
2 changes: 1 addition & 1 deletion src/cadenzaanalytics/request/request_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


# pylint: disable=protected-access
class RequestMetadata(collections.abc.Mapping):
class RequestMetadata(collections.abc.Mapping[str, ColumnMetadata]):
"""Metadata describing the columns in a request table.

Provides access to column metadata by name and groupings by attribute group.
Expand Down
Loading