From 7b3a95ed537ec4fe06a13faa9e66aed6c7b61a82 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Thu, 27 Feb 2025 14:04:12 -0800 Subject: [PATCH] Use C-accelerated safe_load for YAML We were generally already doing the right thing (calling load() with a CSafeLoader Loader), but this should make this a little more ergonomic (i.e., you just need to call safe_load() and not worry about passing in the right Loader to load()) --- .coveragerc | 2 ++ scripts/dump_service_configuration_yaml | 3 +- service_configuration_lib/__init__.py | 8 ++--- service_configuration_lib/spark_config.py | 4 +-- service_configuration_lib/utils.py | 3 +- service_configuration_lib/yaml_cached_view.py | 11 +++--- service_configuration_lib/yaml_tools.py | 34 +++++++++++++++++++ tests/spark_config_test.py | 2 +- 8 files changed, 48 insertions(+), 19 deletions(-) create mode 100644 service_configuration_lib/yaml_tools.py diff --git a/.coveragerc b/.coveragerc index eb185f9..bb3c871 100644 --- a/.coveragerc +++ b/.coveragerc @@ -7,6 +7,8 @@ omit = venv/* /usr/* setup.py + # this file has no real logic + service_configuration_lib/yaml_tools.py [report] show_missing = True diff --git a/scripts/dump_service_configuration_yaml b/scripts/dump_service_configuration_yaml index 1f70683..af438ba 100755 --- a/scripts/dump_service_configuration_yaml +++ b/scripts/dump_service_configuration_yaml @@ -14,9 +14,8 @@ # limitations under the License. import sys -import yaml - import service_configuration_lib +from service_configuration_lib import yaml_tools as yaml sys.path.append('/nail/sys/srv-deploy/lib/') print(yaml.dump(service_configuration_lib.read_services_configuration(), default_flow_style=False)) diff --git a/service_configuration_lib/__init__.py b/service_configuration_lib/__init__.py index 57684eb..3e82555 100644 --- a/service_configuration_lib/__init__.py +++ b/service_configuration_lib/__init__.py @@ -24,12 +24,8 @@ from typing import Mapping import ephemeral_port_reserve -import yaml -try: - from yaml.cyaml import CSafeLoader as Loader -except ImportError: # pragma: no cover (no libyaml-dev / pypy) - Loader = yaml.SafeLoader # type: ignore +from service_configuration_lib import yaml_tools as yaml DEFAULT_SOA_DIR = '/nail/etc/services' log = logging.getLogger(__name__) @@ -60,7 +56,7 @@ def read_port(port_file): def load_yaml(fd): - return yaml.load(fd, Loader=Loader) + return yaml.safe_load(fd) def read_monitoring(monitoring_file): diff --git a/service_configuration_lib/spark_config.py b/service_configuration_lib/spark_config.py index 8578277..4f44df3 100644 --- a/service_configuration_lib/spark_config.py +++ b/service_configuration_lib/spark_config.py @@ -17,10 +17,10 @@ import boto3 import requests -import yaml from boto3 import Session from service_configuration_lib import utils +from service_configuration_lib import yaml_tools as yaml from service_configuration_lib.text_colors import TextColors from service_configuration_lib.utils import EPHEMERAL_PORT_END from service_configuration_lib.utils import EPHEMERAL_PORT_START @@ -193,7 +193,7 @@ def assume_aws_role( """ try: with open(key_file) as creds_file: - creds_dict = yaml.load(creds_file.read(), Loader=yaml.SafeLoader) + creds_dict = yaml.safe_load(creds_file.read()) access_key = creds_dict['AccessKeyId'] secret_key = creds_dict['SecretAccessKey'] except (PermissionError, FileNotFoundError): diff --git a/service_configuration_lib/utils.py b/service_configuration_lib/utils.py index f8f7638..4e91432 100644 --- a/service_configuration_lib/utils.py +++ b/service_configuration_lib/utils.py @@ -15,9 +15,10 @@ from typing import Dict from typing import Tuple -import yaml from typing_extensions import Literal +from service_configuration_lib import yaml_tools as yaml + DEFAULT_SPARK_RUN_CONFIG = '/nail/srv/configs/spark.yaml' POD_TEMPLATE_PATH = '/nail/tmp/spark-pt-{file_uuid}.yaml' SPARK_EXECUTOR_POD_TEMPLATE = '/nail/srv/configs/spark_executor_pod_template.yaml' diff --git a/service_configuration_lib/yaml_cached_view.py b/service_configuration_lib/yaml_cached_view.py index cfa6dd1..8506e17 100644 --- a/service_configuration_lib/yaml_cached_view.py +++ b/service_configuration_lib/yaml_cached_view.py @@ -1,12 +1,9 @@ import logging from collections import defaultdict -import yaml -try: - from yaml import CSafeLoader as SafeLoader # type: ignore -except ImportError: # pragma: no cover - from yaml import SafeLoader # type: ignore +from yaml import YAMLError +from service_configuration_lib import yaml_tools as yaml from service_configuration_lib.cached_view import BaseCachedView log = logging.getLogger(__name__) @@ -38,10 +35,10 @@ def __init__(self): def add(self, path: str, service_name: str, config_name: str, config_suffix: str) -> None: try: with open(path, encoding='utf-8') as fd: - self.configs[service_name][config_name] = yaml.load(fd, Loader=SafeLoader) + self.configs[service_name][config_name] = yaml.safe_load(fd) except OSError as exn: log.warning(f'Error reading {path}: {exn}') - except yaml.YAMLError as exn: + except YAMLError as exn: log.warning(f'Error parsing {path}: {exn}') def remove(self, path: str, service_name: str, config_name: str, config_suffix: str) -> None: diff --git a/service_configuration_lib/yaml_tools.py b/service_configuration_lib/yaml_tools.py new file mode 100644 index 0000000..5f25666 --- /dev/null +++ b/service_configuration_lib/yaml_tools.py @@ -0,0 +1,34 @@ +import yaml + +try: + from yaml import CSafeLoader as Loader + from yaml import CSafeDumper as Dumper +except ImportError: # pragma: no cover + from yaml import SafeLoader as Loader # type: ignore + from yaml import SafeDumper as Dumper # type: ignore + + +def dump(*args, **kwargs): + kwargs['Dumper'] = Dumper + return yaml.dump(*args, **kwargs) + + +def dump_all(*args, **kwargs): + kwargs['Dumper'] = Dumper + return yaml.dump_all(*args, **kwargs) + + +def load(*args, **kwargs): + kwargs['Loader'] = Loader + return yaml.load(*args, **kwargs) + + +def load_all(*args, **kwargs): + kwargs['Loader'] = Loader + return yaml.load_all(*args, **kwargs) + + +safe_dump = dump +safe_dump_all = dump_all +safe_load = load +safe_load_all = load_all diff --git a/tests/spark_config_test.py b/tests/spark_config_test.py index f37b1f6..61e87bb 100644 --- a/tests/spark_config_test.py +++ b/tests/spark_config_test.py @@ -7,10 +7,10 @@ from unittest import mock import pytest -import yaml from service_configuration_lib import spark_config from service_configuration_lib import utils +from service_configuration_lib import yaml_tools as yaml TEST_ACCOUNT_ID = '123456789'