From 0c777ca40808043f3f2a9932413f8cdda09e4cd1 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Sun, 15 Mar 2026 20:55:34 +0100 Subject: [PATCH 1/2] feat(telemetry): introduce telemetry system for usage data collection - Added a new telemetry module to collect and export usage data. - Implemented three telemetry tiers: Off, Internal, and Public. - Integrated OpenTelemetry for data export. - Created user prompts for telemetry consent on first run. - Updated documentation to explain telemetry features and configuration. This enhancement aims to improve CodeCarbon by gathering anonymous usage data while ensuring user privacy and consent. --- codecarbon/__init__.py | 16 +- codecarbon/core/telemetry/collector.py | 545 +++++++++++++++++++++ codecarbon/core/telemetry/config.py | 175 +++++++ codecarbon/core/telemetry/otel_exporter.py | 223 +++++++++ codecarbon/core/telemetry/prompt.py | 169 +++++++ codecarbon/core/telemetry/service.py | 221 +++++++++ docs/telemetry.md | 112 +++++ mkdocs.yml | 1 + pyproject.toml | 8 + 9 files changed, 1469 insertions(+), 1 deletion(-) create mode 100644 codecarbon/core/telemetry/collector.py create mode 100644 codecarbon/core/telemetry/config.py create mode 100644 codecarbon/core/telemetry/otel_exporter.py create mode 100644 codecarbon/core/telemetry/prompt.py create mode 100644 codecarbon/core/telemetry/service.py create mode 100644 docs/telemetry.md diff --git a/codecarbon/__init__.py b/codecarbon/__init__.py index f602f2635..0d4db21d3 100644 --- a/codecarbon/__init__.py +++ b/codecarbon/__init__.py @@ -8,6 +8,20 @@ OfflineEmissionsTracker, track_emissions, ) +from .core.telemetry import ( + TelemetryConfig, + TelemetryTier, + init_telemetry, + set_telemetry, +) -__all__ = ["EmissionsTracker", "OfflineEmissionsTracker", "track_emissions"] +__all__ = [ + "EmissionsTracker", + "OfflineEmissionsTracker", + "track_emissions", + "TelemetryConfig", + "TelemetryTier", + "init_telemetry", + "set_telemetry", +] __app_name__ = "codecarbon" diff --git a/codecarbon/core/telemetry/collector.py b/codecarbon/core/telemetry/collector.py new file mode 100644 index 000000000..8ca9741df --- /dev/null +++ b/codecarbon/core/telemetry/collector.py @@ -0,0 +1,545 @@ +""" +Telemetry data collector. + +Collects environment, hardware, usage, and ML ecosystem data. +""" + +import hashlib +import os +import platform +import sys +from dataclasses import dataclass, field +from typing import Any, Dict, Optional + +from codecarbon._version import __version__ +from codecarbon.core.config import get_hierarchical_config +from codecarbon.external.logger import logger + + +@dataclass +class TelemetryData: + """Container for all telemetry data.""" + + # Environment & Hardware (Tier 1: Internal) + os: str = "" + python_version: str = "" + python_implementation: str = "" + python_executable_hash: str = "" + python_env_type: str = "" + codecarbon_version: str = "" + codecarbon_install_method: str = "" + + cpu_count: int = 0 + cpu_physical_count: int = 0 + cpu_model: str = "" + cpu_architecture: str = "" + + gpu_count: int = 0 + gpu_model: str = "" + gpu_driver_version: str = "" + gpu_memory_total_gb: float = 0.0 + + ram_total_size_gb: float = 0.0 + + cuda_version: str = "" + cudnn_version: str = "" + + cloud_provider: str = "" + cloud_region: str = "" + + # Usage Patterns (Tier 1: Internal) + tracking_mode: str = "" + api_mode: str = "" # offline, online + output_methods: list = field(default_factory=list) + hardware_tracked: list = field(default_factory=list) + measure_power_interval_secs: float = 15.0 + + # ML Ecosystem (Tier 1: Internal) + has_torch: bool = False + torch_version: str = "" + has_transformers: bool = False + transformers_version: str = "" + has_diffusers: bool = False + diffusers_version: str = "" + has_tensorflow: bool = False + tensorflow_version: str = "" + has_keras: bool = False + keras_version: str = "" + has_pytorch_lightning: bool = False + pytorch_lightning_version: str = "" + has_fastai: bool = False + fastai_version: str = "" + ml_framework_primary: str = "" + + # Performance & Errors (Tier 1: Internal) + hardware_detection_success: bool = True + rapl_available: bool = False + gpu_detection_method: str = "" + errors_encountered: list = field(default_factory=list) + tracking_overhead_percent: float = 0.0 + + # Context (Tier 1: Internal) + ide_used: str = "" + notebook_environment: str = "" + ci_environment: str = "" + python_package_manager: str = "" + container_runtime: str = "" + in_container: bool = False + + # Emissions Data (Tier 2: Public only) + total_emissions_kg: float = 0.0 + emissions_rate_kg_per_sec: float = 0.0 + energy_consumed_kwh: float = 0.0 + cpu_energy_kwh: float = 0.0 + gpu_energy_kwh: float = 0.0 + ram_energy_kwh: float = 0.0 + duration_seconds: float = 0.0 + cpu_utilization_avg: float = 0.0 + gpu_utilization_avg: float = 0.0 + ram_utilization_avg: float = 0.0 + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for export.""" + return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} + + +class TelemetryCollector: + """Collects telemetry data.""" + + def __init__(self): + self._data = TelemetryData() + + @property + def data(self) -> TelemetryData: + return self._data + + def collect_environment(self) -> "TelemetryCollector": + """Collect Python environment info.""" + self._data.python_version = platform.python_version() + self._data.python_implementation = platform.python_implementation() + + # Hash executable path for privacy + executable = sys.executable + if executable: + self._data.python_executable_hash = hashlib.sha256( + executable.encode() + ).hexdigest()[:16] + + # Detect environment type + self._data.python_env_type = self._detect_python_env_type() + + # CodeCarbon version + self._data.codecarbon_version = __version__ + + # Install method detection + self._data.codecarbon_install_method = self._detect_install_method() + + # OS + self._data.os = platform.platform() + + # Architecture + self._data.cpu_architecture = platform.machine() + + return self + + def _detect_python_env_type(self) -> str: + """Detect Python environment type.""" + if "conda" in sys.prefix.lower(): + return "conda" + elif hasattr(sys, "real_prefix") or ( + hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix + ): + # Check for common venv patterns + if os.environ.get("VIRTUAL_ENV"): + return "venv" + # Check for uv + if os.environ.get("UV"): + return "uv" + return "virtualenv" + elif os.environ.get("VIRTUAL_ENV"): + return "venv" + elif os.environ.get("UV"): + return "uv" + return "system" + + def _detect_install_method(self) -> str: + """Detect how CodeCarbon was installed.""" + # Check if editable install + import codecarbon + + codecarbon_path = os.path.dirname(codecarbon.__file__) + if ".egg-link" in codecarbon_path or ".editable" in codecarbon_path: + return "editable" + + # Check common package managers + # This is a heuristic - check if in common locations + if "site-packages" in codecarbon_path: + # Could be pip, uv, or conda + if "uv" in codecarbon_path: + return "uv" + elif "conda" in codecarbon_path: + return "conda" + return "pip" + return "unknown" + + def collect_hardware( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + ) -> "TelemetryCollector": + """Collect hardware info.""" + self._data.cpu_count = cpu_count + self._data.cpu_physical_count = cpu_physical_count + self._data.cpu_model = cpu_model + self._data.ram_total_size_gb = ram_total_gb + self._data.gpu_count = gpu_count + self._data.gpu_model = gpu_model + + # Try to detect CUDA + self._detect_cuda() + + # Try to detect GPU driver + self._detect_gpu_driver() + + return self + + def _detect_cuda(self) -> None: + """Detect CUDA version.""" + try: + import torch + + if hasattr(torch, "version") and torch.version: + self._data.cuda_version = str(torch.version.cuda) + if hasattr(torch.backends, "cudnn") and torch.backends.cudnn.is_available(): + self._data.cudnn_version = str(torch.backends.cudnn.version()) + except ImportError: + pass + + def _detect_gpu_driver(self) -> None: + """Detect GPU driver version.""" + try: + import subprocess + + result = subprocess.run( + ["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + self._data.gpu_driver_version = result.stdout.strip().split("\n")[0] + self._data.gpu_detection_method = "nvidia-smi" + + # Also get GPU memory + result = subprocess.run( + [ + "nvidia-smi", + "--query-gpu=memory.total", + "--format=csv,noheader,nounits", + ], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + mem_mb = result.stdout.strip().split("\n")[0] + self._data.gpu_memory_total_gb = float(mem_mb) / 1024 + except (FileNotFoundError, subprocess.TimeoutExpired, ValueError): + pass + + def collect_usage( + self, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + ) -> "TelemetryCollector": + """Collect usage patterns.""" + self._data.tracking_mode = tracking_mode + self._data.api_mode = api_mode + self._data.output_methods = output_methods or [] + self._data.hardware_tracked = hardware_tracked or [] + self._data.measure_power_interval_secs = measure_power_interval + + return self + + def collect_ml_ecosystem(self) -> "TelemetryCollector": + """Detect ML frameworks and libraries.""" + frameworks = [] + + # PyTorch + try: + import torch + + self._data.has_torch = True + self._data.torch_version = torch.__version__ + frameworks.append("pytorch") + except ImportError: + pass + + # Transformers + try: + import transformers + + self._data.has_transformers = True + self._data.transformers_version = transformers.__version__ + except ImportError: + pass + + # Diffusers + try: + import diffusers + + self._data.has_diffusers = True + self._data.diffusers_version = diffusers.__version__ + except ImportError: + pass + + # TensorFlow + try: + import tensorflow + + self._data.has_tensorflow = True + self._data.tensorflow_version = tensorflow.__version__ + frameworks.append("tensorflow") + except ImportError: + pass + + # Keras + try: + import keras + + self._data.has_keras = True + self._data.keras_version = keras.__version__ + except ImportError: + pass + + # PyTorch Lightning + try: + import pytorch_lightning + + self._data.has_pytorch_lightning = True + self._data.pytorch_lightning_version = pytorch_lightning.__version__ + except ImportError: + pass + + # FastAI + try: + import fastai + + self._data.has_fastai = True + self._data.fastai_version = fastai.__version__ + except ImportError: + pass + + # Primary framework + self._data.ml_framework_primary = frameworks[0] if frameworks else "" + + return self + + def collect_context(self) -> "TelemetryCollector": + """Collect development context (IDE, notebook, CI).""" + # Detect notebook + self._data.notebook_environment = self._detect_notebook() + + # Detect CI + self._data.ci_environment = self._detect_ci() + + # Detect container + self._detect_container() + + # Detect package manager + self._data.python_package_manager = self._detect_package_manager() + + return self + + def _detect_notebook(self) -> str: + """Detect notebook environment.""" + try: + # Check for Jupyter + import ipykernel + + return "jupyter" + except ImportError: + pass + + # Check environment variables common in cloud notebooks + if os.environ.get("COLAB_RELEASE_TAG"): + return "colab" + if os.environ.get("KAGGLE_URL_BASE"): + return "kaggle" + + return "none" + + def _detect_ci(self) -> str: + """Detect CI environment.""" + ci_vars = { + "GITHUB_ACTIONS": "github-actions", + "GITLAB_CI": "gitlab", + "JENKINS_URL": "jenkins", + "CIRCLECI": "circleci", + "TRAVIS": "travis", + "BUILDKITE": "buildkite", + "AWS_CODEBUILD": "codebuild", + } + + for var, name in ci_vars.items(): + if os.environ.get(var): + return name + + return "none" + + def _detect_container(self) -> None: + """Detect container runtime.""" + # Check for Docker + if os.path.exists("/.dockerenv"): + self._data.in_container = True + self._data.container_runtime = "docker" + return + + # Check for container environment variables + if os.environ.get("KUBERNETES_SERVICE_HOST"): + self._data.in_container = True + self._data.container_runtime = "kubernetes" + return + + # Check cgroup + try: + with open("/proc/1/cgroup", "r") as f: + content = f.read() + if "docker" in content or "containerd" in content: + self._data.in_container = True + self._data.container_runtime = "docker" + return + except FileNotFoundError: + pass + + self._data.in_container = False + self._data.container_runtime = "none" + + def _detect_package_manager(self) -> str: + """Detect Python package manager.""" + # Check for poetry + if os.path.exists("pyproject.toml"): + with open("pyproject.toml", "r") as f: + if "[tool.poetry]" in f.read(): + return "poetry" + + # Check for uv + if os.path.exists("uv.lock"): + return "uv" + + # Check for pipenv + if os.path.exists("Pipfile"): + return "pipenv" + + # Check for conda + if os.path.exists("environment.yml") or os.path.exists("environment.yaml"): + return "conda" + + return "pip" + + def collect_errors( + self, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + ) -> "TelemetryCollector": + """Collect error information.""" + self._data.rapl_available = rapl_available + self._data.hardware_detection_success = hardware_detection_success + self._data.errors_encountered = errors or [] + + return self + + def collect_emissions( + self, + total_emissions_kg: float = 0.0, + emissions_rate_kg_per_sec: float = 0.0, + energy_consumed_kwh: float = 0.0, + cpu_energy_kwh: float = 0.0, + gpu_energy_kwh: float = 0.0, + ram_energy_kwh: float = 0.0, + duration_seconds: float = 0.0, + cpu_utilization_avg: float = 0.0, + gpu_utilization_avg: float = 0.0, + ram_utilization_avg: float = 0.0, + ) -> "TelemetryCollector": + """Collect emissions data (Tier 2: Public).""" + self._data.total_emissions_kg = total_emissions_kg + self._data.emissions_rate_kg_per_sec = emissions_rate_kg_per_sec + self._data.energy_consumed_kwh = energy_consumed_kwh + self._data.cpu_energy_kwh = cpu_energy_kwh + self._data.gpu_energy_kwh = gpu_energy_kwh + self._data.ram_energy_kwh = ram_energy_kwh + self._data.duration_seconds = duration_seconds + self._data.cpu_utilization_avg = cpu_utilization_avg + self._data.gpu_utilization_avg = gpu_utilization_avg + self._data.ram_utilization_avg = ram_utilization_avg + + return self + + def collect_cloud_info( + self, cloud_provider: str = "", cloud_region: str = "" + ) -> "TelemetryCollector": + """Collect cloud information.""" + self._data.cloud_provider = cloud_provider + self._data.cloud_region = cloud_region + + return self + + def collect_all( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + cloud_provider: str = "", + cloud_region: str = "", + ) -> TelemetryData: + """Collect all available telemetry data.""" + ( + self.collect_environment() + .collect_hardware( + cpu_count=cpu_count, + cpu_physical_count=cpu_physical_count, + cpu_model=cpu_model, + gpu_count=gpu_count, + gpu_model=gpu_model, + ram_total_gb=ram_total_gb, + ) + .collect_usage( + tracking_mode=tracking_mode, + api_mode=api_mode, + output_methods=output_methods, + hardware_tracked=hardware_tracked, + measure_power_interval=measure_power_interval, + ) + .collect_ml_ecosystem() + .collect_context() + .collect_errors( + rapl_available=rapl_available, + hardware_detection_success=hardware_detection_success, + errors=errors, + ) + .collect_cloud_info( + cloud_provider=cloud_provider, cloud_region=cloud_region + ) + ) + + return self._data diff --git a/codecarbon/core/telemetry/config.py b/codecarbon/core/telemetry/config.py new file mode 100644 index 000000000..e6db7e414 --- /dev/null +++ b/codecarbon/core/telemetry/config.py @@ -0,0 +1,175 @@ +""" +Telemetry configuration module. + +Handles the 3-tier telemetry system: +- off: No telemetry +- internal: Private telemetry (helps CodeCarbon improve) +- public: Public telemetry (shares emissions for leaderboard) +""" + +import os +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Optional + +import appdirs + +from codecarbon.external.logger import logger + +# Environment variable name for telemetry setting +TELEMETRY_ENV_VAR = "CODECARBON_TELEMETRY" + +# Environment variable for OTEL endpoint +OTEL_ENDPOINT_ENV_VAR = "CODECARBON_OTEL_ENDPOINT" + +# Default OTEL endpoint (can be configured by CodeCarbon team) +DEFAULT_OTEL_ENDPOINT = "https://otlp.example.com/v1/traces" + + +class TelemetryTier(str, Enum): + """Telemetry tiers.""" + + OFF = "off" + INTERNAL = "internal" + PUBLIC = "public" + + +@dataclass +class TelemetryConfig: + """Telemetry configuration.""" + + tier: TelemetryTier + otel_endpoint: Optional[str] + has_consent: bool + first_run: bool + + @property + def is_enabled(self) -> bool: + """Check if telemetry is enabled.""" + return self.tier != TelemetryTier.OFF + + @property + def is_public(self) -> bool: + """Check if public telemetry (emissions shared).""" + return self.tier == TelemetryTier.PUBLIC + + @property + def is_internal(self) -> bool: + """Check if internal telemetry (private).""" + return self.tier == TelemetryTier.INTERNAL + + +def get_user_config_dir() -> Path: + """Get the user config directory.""" + return Path(appdirs.user_config_dir("codecarbon", "CodeCarbon")) + + +def get_telemetry_preference_file() -> Path: + """Get the file path for storing telemetry preference.""" + return get_user_config_dir() / "telemetry_preference.txt" + + +def save_telemetry_preference(tier: TelemetryTier, dont_ask_again: bool = False) -> None: + """Save user's telemetry preference.""" + config_dir = get_user_config_dir() + config_dir.mkdir(parents=True, exist_ok=True) + + pref_file = get_telemetry_preference_file() + content = f"{tier.value}\n" + if dont_ask_again: + content += "dont_ask_again\n" + pref_file.write_text(content) + logger.info(f"Saved telemetry preference: {tier.value}") + + +def load_telemetry_preference() -> Optional[tuple[TelemetryTier, bool]]: + """Load user's saved telemetry preference. + + Returns: + Tuple of (tier, dont_ask_again) or None if not set. + """ + pref_file = get_telemetry_preference_file() + if not pref_file.exists(): + return None + + try: + content = pref_file.read_text().strip() + lines = content.split("\n") + tier = TelemetryTier(lines[0]) + dont_ask_again = len(lines) > 1 and "dont_ask_again" in lines[1] + return (tier, dont_ask_again) + except (ValueError, IndexError) as e: + logger.debug(f"Could not parse telemetry preference: {e}") + return None + + +def detect_tier_from_env() -> Optional[TelemetryTier]: + """Detect telemetry tier from environment variable.""" + env_value = os.environ.get(TELEMETRY_ENV_VAR, "").lower().strip() + if not env_value: + return None + + try: + return TelemetryTier(env_value) + except ValueError: + logger.warning( + f"Invalid CODECARBON_TELEMETRY value: {env_value}. " + f"Valid values: {', '.join(t.value for t in TelemetryTier)}" + ) + return None + + +def get_otel_endpoint() -> Optional[str]: + """Get OTEL endpoint from environment or return None for default.""" + return os.environ.get(OTEL_ENDPOINT_ENV_VAR) + + +def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: + """ + Get the telemetry configuration. + + Priority order: + 1. Environment variable (CODECARBON_TELEMETRY) + 2. Saved user preference + 3. Default to internal (first run) - telemetry enabled by default + + Args: + force_first_run: Force treating this as first run (for testing) + + Returns: + TelemetryConfig object + """ + # Check environment variable first + tier = detect_tier_from_env() + if tier is not None: + return TelemetryConfig( + tier=tier, + otel_endpoint=get_otel_endpoint(), + has_consent=True, + first_run=False, + ) + + # Check saved preference + saved = load_telemetry_preference() + if saved is not None: + tier, dont_ask = saved + return TelemetryConfig( + tier=tier, + otel_endpoint=get_otel_endpoint(), + has_consent=True, + first_run=False, + ) + + # First run - default to internal (telemetry enabled by default to help CodeCarbon improve) + return TelemetryConfig( + tier=TelemetryTier.INTERNAL, + otel_endpoint=get_otel_endpoint(), + has_consent=True, + first_run=True, + ) + + +def set_telemetry_tier(tier: TelemetryTier, dont_ask_again: bool = False) -> None: + """Set the telemetry tier.""" + save_telemetry_preference(tier, dont_ask_again) diff --git a/codecarbon/core/telemetry/otel_exporter.py b/codecarbon/core/telemetry/otel_exporter.py new file mode 100644 index 000000000..a223b33c8 --- /dev/null +++ b/codecarbon/core/telemetry/otel_exporter.py @@ -0,0 +1,223 @@ +""" +OpenTelemetry exporter for CodeCarbon telemetry. + +Sends telemetry data to an OTEL-compatible endpoint. +""" + +from typing import Any, Dict, Optional + +from codecarbon.core.telemetry.collector import TelemetryData +from codecarbon.core.telemetry.config import TelemetryConfig, TelemetryTier +from codecarbon.external.logger import logger + +# Try to import OpenTelemetry +try: + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as OTLPSpanExporterHTTP + + OTEL_AVAILABLE = True +except ImportError: + OTEL_AVAILABLE = False + logger.debug("OpenTelemetry not available, telemetry will not be exported") + + +class TelemetryExporter: + """ + Exports telemetry data via OpenTelemetry. + + Supports both gRPC and HTTP exporters. + """ + + def __init__(self, config: TelemetryConfig): + """ + Initialize the exporter. + + Args: + config: Telemetry configuration + """ + self._config = config + self._tracer = None + self._initialized = False + + if not OTEL_AVAILABLE: + logger.warning( + "OpenTelemetry not installed. " + "Install with: pip install opentelemetry-api opentelemetry-sdk " + "opentelemetry-exporter-otlp" + ) + return + + if not config.is_enabled: + logger.debug("Telemetry disabled, not initializing exporter") + return + + self._initialize() + + def _initialize(self) -> None: + """Initialize OpenTelemetry tracer.""" + if self._initialized: + return + + try: + # Set up tracer provider + provider = TracerProvider() + trace.set_tracer_provider(provider) + + # Determine endpoint + endpoint = self._config.otel_endpoint + if not endpoint: + logger.debug("No OTEL endpoint configured, skipping exporter init") + return + + # Choose HTTP or gRPC based on endpoint + if endpoint.startswith("http://") or endpoint.startswith("https://"): + # HTTP exporter + exporter = OTLPSpanExporterHTTP(endpoint=endpoint) + else: + # Default to gRPC + exporter = OTLPSpanExporter(endpoint=endpoint) + + # Add batch processor + processor = BatchSpanProcessor(exporter) + provider.add_span_processor(processor) + + # Get tracer + self._tracer = trace.get_tracer(__name__) + self._initialized = True + logger.info(f"Telemetry exporter initialized with endpoint: {endpoint}") + + except Exception as e: + logger.warning(f"Failed to initialize OpenTelemetry exporter: {e}") + self._initialized = False + + def export_telemetry( + self, + data: TelemetryData, + emissions_data: Optional[TelemetryData] = None, + ) -> bool: + """ + Export telemetry data. + + Args: + data: The telemetry data to export + emissions_data: Optional emissions data (only for public tier) + + Returns: + True if export succeeded, False otherwise + """ + if not self._initialized or not self._tracer: + logger.debug("Telemetry exporter not initialized, skipping export") + return False + + if not self._config.is_enabled: + return False + + try: + with self._tracer.start_as_current_span("codecarbon.telemetry") as span: + # Add attributes based on tier + self._add_attributes(span, data) + + # For public tier, also add emissions data + if self._config.is_public and emissions_data: + self._add_emissions_attributes(span, emissions_data) + + logger.debug("Telemetry data exported successfully") + return True + + except Exception as e: + logger.warning(f"Failed to export telemetry: {e}") + return False + + def _add_attributes(self, span, data: TelemetryData) -> None: + """Add telemetry attributes to span.""" + # Environment & Hardware (always for internal/public) + if self._config.is_internal or self._config.is_public: + span.set_attribute("codecarbon.os", data.os) + span.set_attribute("codecarbon.python_version", data.python_version) + span.set_attribute("codecarbon.python_implementation", data.python_implementation) + span.set_attribute("codecarbon.python_env_type", data.python_env_type) + span.set_attribute("codecarbon.codecarbon_version", data.codecarbon_version) + span.set_attribute("codecarbon.codecarbon_install_method", data.codecarbon_install_method) + + # Hardware + span.set_attribute("codecarbon.cpu_count", data.cpu_count) + span.set_attribute("codecarbon.cpu_physical_count", data.cpu_physical_count) + span.set_attribute("codecarbon.cpu_model", data.cpu_model) + span.set_attribute("codecarbon.cpu_architecture", data.cpu_architecture) + span.set_attribute("codecarbon.gpu_count", data.gpu_count) + span.set_attribute("codecarbon.gpu_model", data.gpu_model) + span.set_attribute("codecarbon.ram_total_gb", data.ram_total_size_gb) + + # CUDA/GPU + if data.cuda_version: + span.set_attribute("codecarbon.cuda_version", data.cuda_version) + if data.gpu_driver_version: + span.set_attribute("codecarbon.gpu_driver_version", data.gpu_driver_version) + + # Usage patterns + span.set_attribute("codecarbon.tracking_mode", data.tracking_mode) + span.set_attribute("codecarbon.api_mode", data.api_mode) + span.set_attribute("codecarbon.hardware_tracked", ",".join(data.hardware_tracked)) + span.set_attribute("codecarbon.output_methods", ",".join(data.output_methods)) + span.set_attribute("codecarbon.measure_power_interval", data.measure_power_interval_secs) + + # ML Ecosystem + span.set_attribute("codecarbon.has_torch", data.has_torch) + span.set_attribute("codecarbon.torch_version", data.torch_version or "") + span.set_attribute("codecarbon.has_transformers", data.has_transformers) + span.set_attribute("codecarbon.has_diffusers", data.has_diffusers) + span.set_attribute("codecarbon.has_tensorflow", data.has_tensorflow) + span.set_attribute("codecarbon.has_keras", data.has_keras) + span.set_attribute("codecarbon.ml_framework_primary", data.ml_framework_primary) + + # Context + span.set_attribute("codecarbon.notebook_environment", data.notebook_environment) + span.set_attribute("codecarbon.ci_environment", data.ci_environment) + span.set_attribute("codecarbon.container_runtime", data.container_runtime) + span.set_attribute("codecarbon.in_container", data.in_container) + span.set_attribute("codecarbon.python_package_manager", data.python_package_manager) + + # Performance + span.set_attribute("codecarbon.hardware_detection_success", data.hardware_detection_success) + span.set_attribute("codecarbon.rapl_available", data.rapl_available) + span.set_attribute("codecarbon.gpu_detection_method", data.gpu_detection_method) + + # Cloud + span.set_attribute("codecarbon.cloud_provider", data.cloud_provider) + span.set_attribute("codecarbon.cloud_region", data.cloud_region) + + def _add_emissions_attributes(self, span, data: TelemetryData) -> None: + """Add emissions attributes to span (public tier only).""" + # Emissions data - shared publicly + span.set_attribute("codecarbon.emissions_kg", data.total_emissions_kg) + span.set_attribute("codecarbon.emissions_rate_kg_per_sec", data.emissions_rate_kg_per_sec) + span.set_attribute("codecarbon.energy_consumed_kwh", data.energy_consumed_kwh) + span.set_attribute("codecarbon.cpu_energy_kwh", data.cpu_energy_kwh) + span.set_attribute("codecarbon.gpu_energy_kwh", data.gpu_energy_kwh) + span.set_attribute("codecarbon.ram_energy_kwh", data.ram_energy_kwh) + span.set_attribute("codecarbon.duration_seconds", data.duration_seconds) + span.set_attribute("codecarbon.cpu_utilization_avg", data.cpu_utilization_avg) + span.set_attribute("codecarbon.gpu_utilization_avg", data.gpu_utilization_avg) + span.set_attribute("codecarbon.ram_utilization_avg", data.ram_utilization_avg) + + +def create_exporter(config: TelemetryConfig) -> Optional[TelemetryExporter]: + """ + Create a telemetry exporter based on config. + + Args: + config: Telemetry configuration + + Returns: + TelemetryExporter instance or None if not available + """ + if not OTEL_AVAILABLE: + return None + + if not config.is_enabled: + return None + + return TelemetryExporter(config) diff --git a/codecarbon/core/telemetry/prompt.py b/codecarbon/core/telemetry/prompt.py new file mode 100644 index 000000000..0a61950d1 --- /dev/null +++ b/codecarbon/core/telemetry/prompt.py @@ -0,0 +1,169 @@ +""" +First-run prompt for telemetry consent. + +Shows an interactive prompt to let users choose their telemetry level. +""" + +from typing import Optional + +from codecarbon.core.telemetry.config import ( + TelemetryTier, + get_telemetry_config, + save_telemetry_preference, +) +from codecarbon.external.logger import logger + +# Try to import rich/questionary for interactive prompts +# Falls back to simple input if not available +try: + from rich.console import Console + from rich.prompt import Prompt + + RICH_AVAILABLE = True +except ImportError: + RICH_AVAILABLE = False + +try: + import questionary + + QUESTIONARY_AVAILABLE = True +except ImportError: + QUESTIONARY_AVAILABLE = False + + +console = Console() if RICH_AVAILABLE else None + + +def prompt_for_telemetry_consent() -> Optional[TelemetryTier]: + """ + Prompt user for telemetry consent on first run. + + Returns: + The chosen TelemetryTier, or None if prompt should not be shown. + """ + config = get_telemetry_config() + + # Don't prompt if consent already given via env var or saved preference + if config.has_consent: + return config.tier + + # Check if we should prompt (first run without saved preference) + if not config.first_run: + return config.tier + + # Try interactive prompt, but don't fail if not available + if QUESTIONARY_AVAILABLE: + return _prompt_interactive_questionary() + elif RICH_AVAILABLE: + return _prompt_interactive_rich() + else: + return _prompt_simple() + + +def _prompt_interactive_questionary() -> Optional[TelemetryTier]: + """Prompt using questionary library.""" + try: + answer = questionary.select( + "šŸ“Š CodeCarbon Telemetry\n\n" + "Help improve CodeCarbon by sharing anonymous usage data?\n", + choices=[ + "Internal - Basic environment info (PRIVATE)", + "Public - Full telemetry (SHARED PUBLICLY on leaderboard)", + "Off - No telemetry", + ], + default="Internal - Basic environment info (PRIVATE)", + ).ask() + + if answer is None: + return TelemetryTier.OFF + + if "Internal" in answer: + return TelemetryTier.INTERNAL + elif "Public" in answer: + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Questionary prompt failed: {e}") + return TelemetryTier.OFF + + +def _prompt_interactive_rich() -> Optional[TelemetryTier]: + """Prompt using rich library.""" + if console is None: + return TelemetryTier.OFF + + try: + console.print("\nšŸ“Š [bold]CodeCarbon Telemetry[/bold]\n") + console.print( + "Help improve CodeCarbon by sharing anonymous usage data?\n" + ) + console.print(" [1] Internal - Basic environment info (PRIVATE)") + console.print(" • Python version, OS, CPU/GPU hardware") + console.print(" • Usage patterns, ML frameworks") + console.print(" • Helps us improve the library") + console.print() + console.print(" [2] Public - Full telemetry (SHARED PUBLICLY)") + console.print(" • All of internal + emissions data") + console.print(" • Shown on public leaderboard") + console.print() + console.print(" [3] Off - No telemetry") + console.print() + + answer = Prompt.ask( + "Select option", + choices=["1", "2", "3"], + default="1", + ) + + if answer == "1": + return TelemetryTier.INTERNAL + elif answer == "2": + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Rich prompt failed: {e}") + return TelemetryTier.OFF + + +def _prompt_simple() -> Optional[TelemetryTier]: + """Simple input-based prompt.""" + try: + print("\nšŸ“Š CodeCarbon Telemetry") + print("=" * 40) + print("Help improve CodeCarbon by sharing anonymous usage data?") + print() + print(" 1) Internal - Basic environment info (PRIVATE)") + print(" 2) Public - Full telemetry (SHARED PUBLICLY)") + print(" 3) Off - No telemetry") + print() + answer = input("Select option [1]: ").strip() or "1" + + if answer == "1": + return TelemetryTier.INTERNAL + elif answer == "2": + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Simple prompt failed: {e}") + return TelemetryTier.OFF + + +def prompt_and_save() -> TelemetryTier: + """ + Prompt user and save their choice. + + Returns: + The chosen TelemetryTier. + """ + tier = prompt_for_telemetry_consent() + + if tier is None: + tier = TelemetryTier.OFF + + # Save the preference (don't ask again) + save_telemetry_preference(tier, dont_ask_again=True) + + return tier diff --git a/codecarbon/core/telemetry/service.py b/codecarbon/core/telemetry/service.py new file mode 100644 index 000000000..8d0554ad1 --- /dev/null +++ b/codecarbon/core/telemetry/service.py @@ -0,0 +1,221 @@ +""" +Telemetry service - integrates telemetry with CodeCarbon. + +This module provides functions to initialize and use telemetry. +""" + +from typing import Optional + +from codecarbon.core.telemetry.collector import TelemetryCollector, TelemetryData +from codecarbon.core.telemetry.config import ( + TelemetryConfig, + TelemetryTier, + get_telemetry_config, + set_telemetry_tier, +) +from codecarbon.core.telemetry.otel_exporter import TelemetryExporter, create_exporter +from codecarbon.core.telemetry.prompt import prompt_for_telemetry_consent +from codecarbon.external.logger import logger + + +class TelemetryService: + """Service for managing telemetry.""" + + _instance: Optional["TelemetryService"] = None + _initialized: bool = False + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + if self._initialized: + return + self._config: Optional[TelemetryConfig] = None + self._exporter: Optional[TelemetryExporter] = None + self._collector: Optional[TelemetryCollector] = None + self._initialized = True + + def initialize(self, force_prompt: bool = False) -> TelemetryConfig: + """ + Initialize telemetry service. + + Args: + force_prompt: Force showing the consent prompt + + Returns: + TelemetryConfig + """ + # Get configuration + self._config = get_telemetry_config() + + # If first run and not forced, try to prompt + if self._config.first_run and not self._config.has_consent: + if force_prompt: + # This will show prompt if needed + pass + + # Create exporter if enabled + if self._config.is_enabled: + self._exporter = create_exporter(self._config) + self._collector = TelemetryCollector() + + logger.info( + f"Telemetry initialized: tier={self._config.tier.value}, " + f"enabled={self._config.is_enabled}" + ) + + return self._config + + def get_config(self) -> Optional[TelemetryConfig]: + """Get current telemetry config.""" + return self._config + + def collect_and_export( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + cloud_provider: str = "", + cloud_region: str = "", + ) -> bool: + """ + Collect and export telemetry data. + + Returns: + True if successful, False otherwise + """ + if not self._config or not self._config.is_enabled: + return False + + if not self._collector or not self._exporter: + return False + + try: + # Collect data + data = self._collector.collect_all( + cpu_count=cpu_count, + cpu_physical_count=cpu_physical_count, + cpu_model=cpu_model, + gpu_count=gpu_count, + gpu_model=gpu_model, + ram_total_gb=ram_total_gb, + tracking_mode=tracking_mode, + api_mode=api_mode, + output_methods=output_methods, + hardware_tracked=hardware_tracked, + measure_power_interval=measure_power_interval, + rapl_available=rapl_available, + hardware_detection_success=hardware_detection_success, + errors=errors, + cloud_provider=cloud_provider, + cloud_region=cloud_region, + ) + + # Export + return self._exporter.export_telemetry(data) + + except Exception as e: + logger.warning(f"Failed to collect/export telemetry: {e}") + return False + + def export_emissions( + self, + total_emissions_kg: float = 0.0, + emissions_rate_kg_per_sec: float = 0.0, + energy_consumed_kwh: float = 0.0, + cpu_energy_kwh: float = 0.0, + gpu_energy_kwh: float = 0.0, + ram_energy_kwh: float = 0.0, + duration_seconds: float = 0.0, + cpu_utilization_avg: float = 0.0, + gpu_utilization_avg: float = 0.0, + ram_utilization_avg: float = 0.0, + ) -> bool: + """ + Export emissions data (only for public tier). + + Returns: + True if successful, False otherwise + """ + if not self._config or not self._config.is_public: + return False + + if not self._collector or not self._exporter: + return False + + try: + # Collect emissions data + data = TelemetryData() + data.collect_emissions( + total_emissions_kg=total_emissions_kg, + emissions_rate_kg_per_sec=emissions_rate_kg_per_sec, + energy_consumed_kwh=energy_consumed_kwh, + cpu_energy_kwh=cpu_energy_kwh, + gpu_energy_kwh=gpu_energy_kwh, + ram_energy_kwh=ram_energy_kwh, + duration_seconds=duration_seconds, + cpu_utilization_avg=cpu_utilization_avg, + gpu_utilization_avg=gpu_utilization_avg, + ram_utilization_avg=ram_utilization_avg, + ) + + # Export + return self._exporter.export_telemetry(data) + + except Exception as e: + logger.warning(f"Failed to export emissions telemetry: {e}") + return False + + +# Global instance +_telemetry_service: Optional[TelemetryService] = None + + +def get_telemetry_service() -> TelemetryService: + """Get the global telemetry service instance.""" + global _telemetry_service + if _telemetry_service is None: + _telemetry_service = TelemetryService() + return _telemetry_service + + +def init_telemetry(force_prompt: bool = False) -> TelemetryConfig: + """ + Initialize telemetry. + + Args: + force_prompt: Force showing consent prompt + + Returns: + TelemetryConfig + """ + service = get_telemetry_service() + return service.initialize(force_prompt=force_prompt) + + +def set_telemetry(tier: str, dont_ask_again: bool = True) -> None: + """ + Set telemetry tier programmatically. + + Args: + tier: "off", "internal", or "public" + dont_ask_again: Don't ask again in future + """ + try: + tier_enum = TelemetryTier(tier) + set_telemetry_tier(tier_enum, dont_ask_again=dont_ask_again) + except ValueError: + logger.warning(f"Invalid telemetry tier: {tier}") diff --git a/docs/telemetry.md b/docs/telemetry.md new file mode 100644 index 000000000..f1c1b3dc5 --- /dev/null +++ b/docs/telemetry.md @@ -0,0 +1,112 @@ +# Telemetry + +CodeCarbon collects anonymous usage data to help improve the library. This page explains what we collect, how we handle your data, and how you can control it. + +## Telemetry Tiers + +CodeCarbon supports three telemetry levels: + +| Tier | Env Variable | Description | +|------|-------------|-------------| +| Off | `CODECARBON_TELEMETRY=off` | No telemetry collected | +| Internal | `CODECARBON_TELEMETRY=internal` | Private usage data (helps us improve CodeCarbon) | +| Public | `CODECARBON_TELEMETRY=public` | Full telemetry including emissions (shared on public leaderboard) | + +## What We Collect + +### Internal (Private) + +When you enable Internal telemetry, we collect: + +- **Environment**: Python version, OS, CodeCarbon version, installation method +- **Hardware**: CPU model/count, GPU model/count, RAM, CUDA version +- **Usage Patterns**: Tracking mode, output methods configured, hardware tracked +- **ML Ecosystem**: Detected frameworks (PyTorch, TensorFlow, Transformers, etc.) +- **Context**: Notebook environment, CI/CD detection, container runtime +- **Performance**: Hardware detection success, RAPL availability, errors + +### Public (Leaderboard) + +When you enable Public telemetry, everything above **plus**: + +- **Emissions Data**: Total CO2 emissions, energy consumed, duration +- **Utilization**: CPU, GPU, RAM utilization averages + +This data is shared publicly on the CodeCarbon leaderboard to encourage green computing practices. + +## Privacy + +We're committed to protecting your privacy: + +- **No PII**: We don't collect personally identifiable information +- **Anonymized**: Machine identifiers are hashed +- **GPS Precision**: Geographic coordinates are rounded to ~10km +- **GDPR Compliant**: We support opt-in consent and data deletion requests +- **Minimal Data**: We only collect what's needed to improve the library + +## Configuration + +### Environment Variables + +```bash +# Set telemetry tier +export CODECARBON_TELEMETRY=internal + +# Set custom OTEL endpoint (optional) +export CODECARBON_OTEL_ENDPOINT=https://your-otel-endpoint.com/v1/traces +``` + +### In Code + +```python +from codecarbon import EmissionsTracker + +# Telemetry can also be set in the tracker +tracker = EmissionsTracker( + project_name="my-project", + telemetry="internal" # or "public" or "off" +) +``` + +## First-Run Prompt + +On first run, CodeCarbon will prompt you to choose your telemetry level if: + +- No `CODECARBON_TELEMETRY` environment variable is set +- No previous preference was saved + +You can skip the prompt by setting the environment variable before running CodeCarbon. + +## Disabling Telemetry + +To completely disable telemetry: + +```bash +export CODECARBON_TELEMETRY=off +``` + +Or in your code: + +```python +tracker = EmissionsTracker(telemetry="off") +``` + +## OTEL Integration + +Telemetry data is sent via OpenTelemetry (OTEL). To use your own OTEL collector: + +```bash +export CODECARBON_OTEL_ENDPOINT=https://your-collector:4318/v1/traces +``` + +Install the OTEL extras if you want to export telemetry: + +```bash +pip install codecarbon[telemetry] +``` + +## Data Retention + +- Internal telemetry: Retained for 12 months +- Public leaderboard data: Displayed indefinitely +- You can request data deletion by contacting the CodeCarbon team diff --git a/mkdocs.yml b/mkdocs.yml index 4517f6b98..c6d4923e8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -128,4 +128,5 @@ nav: - Output: logging/output.md - Collecting emissions to a logger: logging/to_logger.md - Visualize: logging/visualize.md + - Telemetry: telemetry.md - Track GenAI API Calls (EcoLogits) ↗: https://ecologits.ai/latest/?utm_source=codecarbon&utm_medium=docs diff --git a/pyproject.toml b/pyproject.toml index 518acb7ed..d5a4169f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ + "appdirs", "arrow", "authlib>=1.2.1", "click", @@ -42,6 +43,13 @@ dependencies = [ "typer", ] +[project.optional-dependencies] +telemetry = [ + "opentelemetry-api", + "opentelemetry-sdk", + "opentelemetry-exporter-otlp", +] + [tool.setuptools.dynamic] version = {attr = "codecarbon._version.__version__"} From 5fcde060a657d1db6dfc1e350f76dee0547f3c98 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 17 Mar 2026 20:00:03 +0100 Subject: [PATCH 2/2] feat(telemetry): enhance telemetry configuration and management - Added functions to save and load telemetry configuration from a file. - Introduced a command-line interface for managing telemetry settings, including setup and current configuration display. - Implemented logging for telemetry configuration warnings at the end of each run. - Enhanced the API client to send telemetry data to the new endpoint. These changes improve user experience by providing better management of telemetry settings and ensuring proper configuration is communicated effectively. --- codecarbon/cli/cli_utils.py | 78 +++++++++++++++++++++++ codecarbon/cli/main.py | 95 +++++++++++++++++++++++++++++ codecarbon/core/api_client.py | 29 +++++++++ codecarbon/core/telemetry/config.py | 48 ++++++++++++--- codecarbon/emissions_tracker.py | 30 +++++++++ 5 files changed, 273 insertions(+), 7 deletions(-) diff --git a/codecarbon/cli/cli_utils.py b/codecarbon/cli/cli_utils.py index 4f3daf4c6..c578b58d6 100644 --- a/codecarbon/cli/cli_utils.py +++ b/codecarbon/cli/cli_utils.py @@ -108,3 +108,81 @@ def create_new_config_file(): f.write("[codecarbon]\n") typer.echo(f"Config file created at {file_path}") return file_path + + +def save_telemetry_config_to_file( + tier: str = None, + project_token: str = None, + api_endpoint: str = None, + path: Path = None +) -> None: + """ + Save telemetry configuration as JSON in the existing config file. + + Args: + tier: Telemetry tier (off, internal, public) + project_token: Project token for Tier 2 + api_endpoint: API endpoint for telemetry + path: Path to config file (defaults to ~/.codecarbon.config) + """ + import json + + p = path or Path.home() / ".codecarbon.config" + + # Read existing config or create new + config = configparser.ConfigParser() + if p.exists(): + config.read(str(p)) + + if "codecarbon" not in config.sections(): + config.add_section("codecarbon") + + # Build JSON config for telemetry + telemetry_config = {} + if tier: + telemetry_config["telemetry_tier"] = tier + if project_token: + telemetry_config["telemetry_project_token"] = project_token + if api_endpoint: + telemetry_config["telemetry_api_endpoint"] = api_endpoint + + # Save as JSON string + if telemetry_config: + config["codecarbon"]["telemetry"] = json.dumps(telemetry_config) + + with p.open("w") as f: + config.write(f) + logger.info(f"Telemetry config saved to {p}") + + +def load_telemetry_config_from_file(path: Path = None) -> dict: + """ + Load telemetry configuration from the existing config file. + + Args: + path: Path to config file (defaults to ~/.codecarbon.config) + + Returns: + Dictionary with telemetry configuration + """ + import json + + p = path or Path.home() / ".codecarbon.config" + + if not p.exists(): + return {} + + config = configparser.ConfigParser() + config.read(str(p)) + + if "codecarbon" not in config.sections(): + return {} + + telemetry_str = config["codecarbon"].get("telemetry") + if telemetry_str: + try: + return json.loads(telemetry_str) + except json.JSONDecodeError: + return {} + + return {} diff --git a/codecarbon/cli/main.py b/codecarbon/cli/main.py index 7fd097b45..2c279ef65 100644 --- a/codecarbon/cli/main.py +++ b/codecarbon/cli/main.py @@ -436,5 +436,100 @@ def questionary_prompt(prompt, list_options, default): return value +@codecarbon.command("telemetry", short_help="Configure CodeCarbon telemetry") +def telemetry(): + """ + Manage CodeCarbon telemetry settings. + + Use subcommands to configure or view telemetry settings: + - codecarbon telemetry setup : Interactive setup wizard + - codecarbon telemetry config : Show current configuration + - codecarbon telemetry enable : Enable telemetry + """ + print("CodeCarbon Telemetry") + print("Use --help for more information on subcommands.") + + +@telemetry.command("setup", short_help="Interactive telemetry setup wizard") +def telemetry_setup(): + """ + Interactive wizard to configure CodeCarbon telemetry. + """ + from codecarbon.core.telemetry.config import ( + TelemetryTier, + get_telemetry_config, + set_telemetry_tier, + ) + from codecarbon.core.telemetry.config import ( + TELEMETRY_PROJECT_TOKEN_ENV_VAR, + TELEMETRY_API_ENDPOINT_ENV_VAR, + TELEMETRY_ENV_VAR, + ) + + print("\n=== CodeCarbon Telemetry Setup ===\n") + + # Show current config + config = get_telemetry_config() + print(f"Current tier: {config.tier.value}") + print(f"Current project token: {'set' if config.project_token else 'not set'}") + print(f"Current API endpoint: {config.api_endpoint or 'default'}") + + # Ask for tier + print("\nChoose telemetry tier:") + tier_choice = questionary.select( + "Telemetry tier:", + ["off", "internal", "public"], + default=config.tier.value, + ).ask() + + # Save tier preference + set_telemetry_tier(TelemetryTier(tier_choice), dont_ask_again=True) + print(f"\nTelemetry tier set to: {tier_choice}") + + # Ask for project token if public tier + if tier_choice == "public": + project_token = typer.prompt( + f"Project token (from {TELEMETRY_PROJECT_TOKEN_ENV_VAR} env var)", + default=config.project_token or "", + ) + if project_token: + print(f"\nTo enable Tier 2 (public) telemetry, set:") + print(f" export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}={project_token}") + + # Ask for API endpoint + api_endpoint = typer.prompt( + f"API endpoint (default: https://api.codecarbon.io)", + default=config.api_endpoint or "https://api.codecarbon.io", + ) + if api_endpoint and api_endpoint != "https://api.codecarbon.io": + print(f"\nTo use custom API endpoint, set:") + print(f" export {TELEMETRY_API_ENDPOINT_ENV_VAR}={api_endpoint}") + + print("\n=== Setup Complete ===") + print("\nEnvironment variables to configure:") + print(f" export {TELEMETRY_ENV_VAR}={tier_choice}") + if tier_choice == "public" and project_token: + print(f" export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}=") + print(f" export {TELEMETRY_API_ENDPOINT_ENV_VAR}={api_endpoint}") + + +@telemetry.command("config", short_help="Show current telemetry configuration") +def telemetry_config(): + """ + Display current telemetry configuration. + """ + from codecarbon.core.telemetry.config import get_telemetry_config + + config = get_telemetry_config() + + print("\n=== Current Telemetry Configuration ===\n") + print(f"Tier: {config.tier.value}") + print(f"Enabled: {config.is_enabled}") + print(f"Project Token: {'configured' if config.project_token else 'not configured'}") + print(f"API Endpoint: {config.api_endpoint or 'default (https://api.codecarbon.io)'}") + print(f"First Run: {config.first_run}") + print(f"Has Consent: {config.has_consent}") + + if __name__ == "__main__": main() diff --git a/codecarbon/core/api_client.py b/codecarbon/core/api_client.py index 34067c71c..62495e738 100644 --- a/codecarbon/core/api_client.py +++ b/codecarbon/core/api_client.py @@ -353,6 +353,35 @@ def close_experiment(self): Tell the API that the experiment has ended. """ + def add_telemetry(self, telemetry_data: dict, api_key: str = None) -> bool: + """ + Send telemetry data to the /telemetry endpoint (Tier 1). + + Args: + telemetry_data: Dictionary containing telemetry payload + api_key: Optional API key for authentication + + Returns: + True if successful, False otherwise + """ + try: + url = self.url + "/telemetry" + headers = self._get_headers() + + # Use provided api_key or fall back to instance api_key + if api_key: + headers["x-api-token"] = api_key + + r = requests.post(url=url, json=telemetry_data, timeout=5, headers=headers) + if r.status_code not in (200, 201): + self._log_error(url, telemetry_data, r) + return False + logger.debug(f"Telemetry data sent successfully to {url}") + return True + except Exception as e: + logger.error(f"Failed to send telemetry data: {e}") + return False + class simple_utc(tzinfo): def tzname(self, **kwargs): diff --git a/codecarbon/core/telemetry/config.py b/codecarbon/core/telemetry/config.py index e6db7e414..d130bf8cd 100644 --- a/codecarbon/core/telemetry/config.py +++ b/codecarbon/core/telemetry/config.py @@ -5,8 +5,12 @@ - off: No telemetry - internal: Private telemetry (helps CodeCarbon improve) - public: Public telemetry (shares emissions for leaderboard) + +For Tier 1 (internal): POST to /telemetry endpoint +For Tier 2 (public): Uses core public API with project token """ +import json import os from dataclasses import dataclass from enum import Enum @@ -20,11 +24,17 @@ # Environment variable name for telemetry setting TELEMETRY_ENV_VAR = "CODECARBON_TELEMETRY" -# Environment variable for OTEL endpoint +# Environment variable for project token (Tier 2 / public) +TELEMETRY_PROJECT_TOKEN_ENV_VAR = "CODECARBON_TELEMETRY_PROJECT_TOKEN" + +# Environment variable for API endpoint +TELEMETRY_API_ENDPOINT_ENV_VAR = "CODECARBON_TELEMETRY_API_ENDPOINT" + +# Legacy OTEL endpoint env var (for backward compatibility) OTEL_ENDPOINT_ENV_VAR = "CODECARBON_OTEL_ENDPOINT" -# Default OTEL endpoint (can be configured by CodeCarbon team) -DEFAULT_OTEL_ENDPOINT = "https://otlp.example.com/v1/traces" +# Default API endpoint +DEFAULT_API_ENDPOINT = "https://api.codecarbon.io" class TelemetryTier(str, Enum): @@ -40,9 +50,12 @@ class TelemetryConfig: """Telemetry configuration.""" tier: TelemetryTier - otel_endpoint: Optional[str] + project_token: Optional[str] + api_endpoint: Optional[str] has_consent: bool first_run: bool + # Legacy OTEL support (still used for internal tier) + otel_endpoint: Optional[str] = None @property def is_enabled(self) -> bool: @@ -125,6 +138,16 @@ def get_otel_endpoint() -> Optional[str]: return os.environ.get(OTEL_ENDPOINT_ENV_VAR) +def get_telemetry_project_token() -> Optional[str]: + """Get telemetry project token from environment.""" + return os.environ.get(TELEMETRY_PROJECT_TOKEN_ENV_VAR) + + +def get_telemetry_api_endpoint() -> Optional[str]: + """Get telemetry API endpoint from environment.""" + return os.environ.get(TELEMETRY_API_ENDPOINT_ENV_VAR) + + def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: """ Get the telemetry configuration. @@ -140,12 +163,19 @@ def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: Returns: TelemetryConfig object """ + # Get common config values + project_token = get_telemetry_project_token() + api_endpoint = get_telemetry_api_endpoint() + otel_endpoint = get_otel_endpoint() + # Check environment variable first tier = detect_tier_from_env() if tier is not None: return TelemetryConfig( tier=tier, - otel_endpoint=get_otel_endpoint(), + project_token=project_token, + api_endpoint=api_endpoint, + otel_endpoint=otel_endpoint, has_consent=True, first_run=False, ) @@ -156,7 +186,9 @@ def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: tier, dont_ask = saved return TelemetryConfig( tier=tier, - otel_endpoint=get_otel_endpoint(), + project_token=project_token, + api_endpoint=api_endpoint, + otel_endpoint=otel_endpoint, has_consent=True, first_run=False, ) @@ -164,7 +196,9 @@ def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: # First run - default to internal (telemetry enabled by default to help CodeCarbon improve) return TelemetryConfig( tier=TelemetryTier.INTERNAL, - otel_endpoint=get_otel_endpoint(), + project_token=project_token, + api_endpoint=api_endpoint, + otel_endpoint=otel_endpoint, has_consent=True, first_run=True, ) diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index a070ea56c..64b00adb5 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -756,8 +756,38 @@ def stop(self) -> Optional[float]: for handler in self._output_handlers: handler.exit() + # Log telemetry configuration warning + self._log_telemetry_warning() + return emissions_data.emissions + def _log_telemetry_warning(self) -> None: + """ + Log a warning about telemetry configuration at the end of each run. + """ + from codecarbon.core.telemetry.config import ( + TELEMETRY_ENV_VAR, + TELEMETRY_PROJECT_TOKEN_ENV_VAR, + get_telemetry_config, + ) + + config = get_telemetry_config() + + if not config.is_enabled: + logger.warning( + f"Telemetry is disabled. To enable, run: codecarbon telemetry setup\n" + f"Or set environment variable: export {TELEMETRY_ENV_VAR}=internal" + ) + elif config.is_public and not config.project_token: + logger.warning( + f"Telemetry is set to 'public' but no project token is configured.\n" + f"To configure Tier 2 (public) telemetry, run: codecarbon telemetry setup\n" + f"Or set: export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}=" + ) + elif config.is_enabled and not config.first_run: + # Telemetry is properly configured + logger.debug(f"Telemetry enabled: tier={config.tier.value}") + def _persist_data( self, total_emissions: EmissionsData,