diff --git a/codecarbon/__init__.py b/codecarbon/__init__.py index f602f2635..0d4db21d3 100644 --- a/codecarbon/__init__.py +++ b/codecarbon/__init__.py @@ -8,6 +8,20 @@ OfflineEmissionsTracker, track_emissions, ) +from .core.telemetry import ( + TelemetryConfig, + TelemetryTier, + init_telemetry, + set_telemetry, +) -__all__ = ["EmissionsTracker", "OfflineEmissionsTracker", "track_emissions"] +__all__ = [ + "EmissionsTracker", + "OfflineEmissionsTracker", + "track_emissions", + "TelemetryConfig", + "TelemetryTier", + "init_telemetry", + "set_telemetry", +] __app_name__ = "codecarbon" diff --git a/codecarbon/cli/cli_utils.py b/codecarbon/cli/cli_utils.py index 4f3daf4c6..c578b58d6 100644 --- a/codecarbon/cli/cli_utils.py +++ b/codecarbon/cli/cli_utils.py @@ -108,3 +108,81 @@ def create_new_config_file(): f.write("[codecarbon]\n") typer.echo(f"Config file created at {file_path}") return file_path + + +def save_telemetry_config_to_file( + tier: str = None, + project_token: str = None, + api_endpoint: str = None, + path: Path = None +) -> None: + """ + Save telemetry configuration as JSON in the existing config file. + + Args: + tier: Telemetry tier (off, internal, public) + project_token: Project token for Tier 2 + api_endpoint: API endpoint for telemetry + path: Path to config file (defaults to ~/.codecarbon.config) + """ + import json + + p = path or Path.home() / ".codecarbon.config" + + # Read existing config or create new + config = configparser.ConfigParser() + if p.exists(): + config.read(str(p)) + + if "codecarbon" not in config.sections(): + config.add_section("codecarbon") + + # Build JSON config for telemetry + telemetry_config = {} + if tier: + telemetry_config["telemetry_tier"] = tier + if project_token: + telemetry_config["telemetry_project_token"] = project_token + if api_endpoint: + telemetry_config["telemetry_api_endpoint"] = api_endpoint + + # Save as JSON string + if telemetry_config: + config["codecarbon"]["telemetry"] = json.dumps(telemetry_config) + + with p.open("w") as f: + config.write(f) + logger.info(f"Telemetry config saved to {p}") + + +def load_telemetry_config_from_file(path: Path = None) -> dict: + """ + Load telemetry configuration from the existing config file. + + Args: + path: Path to config file (defaults to ~/.codecarbon.config) + + Returns: + Dictionary with telemetry configuration + """ + import json + + p = path or Path.home() / ".codecarbon.config" + + if not p.exists(): + return {} + + config = configparser.ConfigParser() + config.read(str(p)) + + if "codecarbon" not in config.sections(): + return {} + + telemetry_str = config["codecarbon"].get("telemetry") + if telemetry_str: + try: + return json.loads(telemetry_str) + except json.JSONDecodeError: + return {} + + return {} diff --git a/codecarbon/cli/main.py b/codecarbon/cli/main.py index 7fd097b45..2c279ef65 100644 --- a/codecarbon/cli/main.py +++ b/codecarbon/cli/main.py @@ -436,5 +436,100 @@ def questionary_prompt(prompt, list_options, default): return value +@codecarbon.command("telemetry", short_help="Configure CodeCarbon telemetry") +def telemetry(): + """ + Manage CodeCarbon telemetry settings. + + Use subcommands to configure or view telemetry settings: + - codecarbon telemetry setup : Interactive setup wizard + - codecarbon telemetry config : Show current configuration + - codecarbon telemetry enable : Enable telemetry + """ + print("CodeCarbon Telemetry") + print("Use --help for more information on subcommands.") + + +@telemetry.command("setup", short_help="Interactive telemetry setup wizard") +def telemetry_setup(): + """ + Interactive wizard to configure CodeCarbon telemetry. + """ + from codecarbon.core.telemetry.config import ( + TelemetryTier, + get_telemetry_config, + set_telemetry_tier, + ) + from codecarbon.core.telemetry.config import ( + TELEMETRY_PROJECT_TOKEN_ENV_VAR, + TELEMETRY_API_ENDPOINT_ENV_VAR, + TELEMETRY_ENV_VAR, + ) + + print("\n=== CodeCarbon Telemetry Setup ===\n") + + # Show current config + config = get_telemetry_config() + print(f"Current tier: {config.tier.value}") + print(f"Current project token: {'set' if config.project_token else 'not set'}") + print(f"Current API endpoint: {config.api_endpoint or 'default'}") + + # Ask for tier + print("\nChoose telemetry tier:") + tier_choice = questionary.select( + "Telemetry tier:", + ["off", "internal", "public"], + default=config.tier.value, + ).ask() + + # Save tier preference + set_telemetry_tier(TelemetryTier(tier_choice), dont_ask_again=True) + print(f"\nTelemetry tier set to: {tier_choice}") + + # Ask for project token if public tier + if tier_choice == "public": + project_token = typer.prompt( + f"Project token (from {TELEMETRY_PROJECT_TOKEN_ENV_VAR} env var)", + default=config.project_token or "", + ) + if project_token: + print(f"\nTo enable Tier 2 (public) telemetry, set:") + print(f" export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}={project_token}") + + # Ask for API endpoint + api_endpoint = typer.prompt( + f"API endpoint (default: https://api.codecarbon.io)", + default=config.api_endpoint or "https://api.codecarbon.io", + ) + if api_endpoint and api_endpoint != "https://api.codecarbon.io": + print(f"\nTo use custom API endpoint, set:") + print(f" export {TELEMETRY_API_ENDPOINT_ENV_VAR}={api_endpoint}") + + print("\n=== Setup Complete ===") + print("\nEnvironment variables to configure:") + print(f" export {TELEMETRY_ENV_VAR}={tier_choice}") + if tier_choice == "public" and project_token: + print(f" export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}=") + print(f" export {TELEMETRY_API_ENDPOINT_ENV_VAR}={api_endpoint}") + + +@telemetry.command("config", short_help="Show current telemetry configuration") +def telemetry_config(): + """ + Display current telemetry configuration. + """ + from codecarbon.core.telemetry.config import get_telemetry_config + + config = get_telemetry_config() + + print("\n=== Current Telemetry Configuration ===\n") + print(f"Tier: {config.tier.value}") + print(f"Enabled: {config.is_enabled}") + print(f"Project Token: {'configured' if config.project_token else 'not configured'}") + print(f"API Endpoint: {config.api_endpoint or 'default (https://api.codecarbon.io)'}") + print(f"First Run: {config.first_run}") + print(f"Has Consent: {config.has_consent}") + + if __name__ == "__main__": main() diff --git a/codecarbon/core/api_client.py b/codecarbon/core/api_client.py index 34067c71c..62495e738 100644 --- a/codecarbon/core/api_client.py +++ b/codecarbon/core/api_client.py @@ -353,6 +353,35 @@ def close_experiment(self): Tell the API that the experiment has ended. """ + def add_telemetry(self, telemetry_data: dict, api_key: str = None) -> bool: + """ + Send telemetry data to the /telemetry endpoint (Tier 1). + + Args: + telemetry_data: Dictionary containing telemetry payload + api_key: Optional API key for authentication + + Returns: + True if successful, False otherwise + """ + try: + url = self.url + "/telemetry" + headers = self._get_headers() + + # Use provided api_key or fall back to instance api_key + if api_key: + headers["x-api-token"] = api_key + + r = requests.post(url=url, json=telemetry_data, timeout=5, headers=headers) + if r.status_code not in (200, 201): + self._log_error(url, telemetry_data, r) + return False + logger.debug(f"Telemetry data sent successfully to {url}") + return True + except Exception as e: + logger.error(f"Failed to send telemetry data: {e}") + return False + class simple_utc(tzinfo): def tzname(self, **kwargs): diff --git a/codecarbon/core/telemetry/collector.py b/codecarbon/core/telemetry/collector.py new file mode 100644 index 000000000..8ca9741df --- /dev/null +++ b/codecarbon/core/telemetry/collector.py @@ -0,0 +1,545 @@ +""" +Telemetry data collector. + +Collects environment, hardware, usage, and ML ecosystem data. +""" + +import hashlib +import os +import platform +import sys +from dataclasses import dataclass, field +from typing import Any, Dict, Optional + +from codecarbon._version import __version__ +from codecarbon.core.config import get_hierarchical_config +from codecarbon.external.logger import logger + + +@dataclass +class TelemetryData: + """Container for all telemetry data.""" + + # Environment & Hardware (Tier 1: Internal) + os: str = "" + python_version: str = "" + python_implementation: str = "" + python_executable_hash: str = "" + python_env_type: str = "" + codecarbon_version: str = "" + codecarbon_install_method: str = "" + + cpu_count: int = 0 + cpu_physical_count: int = 0 + cpu_model: str = "" + cpu_architecture: str = "" + + gpu_count: int = 0 + gpu_model: str = "" + gpu_driver_version: str = "" + gpu_memory_total_gb: float = 0.0 + + ram_total_size_gb: float = 0.0 + + cuda_version: str = "" + cudnn_version: str = "" + + cloud_provider: str = "" + cloud_region: str = "" + + # Usage Patterns (Tier 1: Internal) + tracking_mode: str = "" + api_mode: str = "" # offline, online + output_methods: list = field(default_factory=list) + hardware_tracked: list = field(default_factory=list) + measure_power_interval_secs: float = 15.0 + + # ML Ecosystem (Tier 1: Internal) + has_torch: bool = False + torch_version: str = "" + has_transformers: bool = False + transformers_version: str = "" + has_diffusers: bool = False + diffusers_version: str = "" + has_tensorflow: bool = False + tensorflow_version: str = "" + has_keras: bool = False + keras_version: str = "" + has_pytorch_lightning: bool = False + pytorch_lightning_version: str = "" + has_fastai: bool = False + fastai_version: str = "" + ml_framework_primary: str = "" + + # Performance & Errors (Tier 1: Internal) + hardware_detection_success: bool = True + rapl_available: bool = False + gpu_detection_method: str = "" + errors_encountered: list = field(default_factory=list) + tracking_overhead_percent: float = 0.0 + + # Context (Tier 1: Internal) + ide_used: str = "" + notebook_environment: str = "" + ci_environment: str = "" + python_package_manager: str = "" + container_runtime: str = "" + in_container: bool = False + + # Emissions Data (Tier 2: Public only) + total_emissions_kg: float = 0.0 + emissions_rate_kg_per_sec: float = 0.0 + energy_consumed_kwh: float = 0.0 + cpu_energy_kwh: float = 0.0 + gpu_energy_kwh: float = 0.0 + ram_energy_kwh: float = 0.0 + duration_seconds: float = 0.0 + cpu_utilization_avg: float = 0.0 + gpu_utilization_avg: float = 0.0 + ram_utilization_avg: float = 0.0 + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for export.""" + return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} + + +class TelemetryCollector: + """Collects telemetry data.""" + + def __init__(self): + self._data = TelemetryData() + + @property + def data(self) -> TelemetryData: + return self._data + + def collect_environment(self) -> "TelemetryCollector": + """Collect Python environment info.""" + self._data.python_version = platform.python_version() + self._data.python_implementation = platform.python_implementation() + + # Hash executable path for privacy + executable = sys.executable + if executable: + self._data.python_executable_hash = hashlib.sha256( + executable.encode() + ).hexdigest()[:16] + + # Detect environment type + self._data.python_env_type = self._detect_python_env_type() + + # CodeCarbon version + self._data.codecarbon_version = __version__ + + # Install method detection + self._data.codecarbon_install_method = self._detect_install_method() + + # OS + self._data.os = platform.platform() + + # Architecture + self._data.cpu_architecture = platform.machine() + + return self + + def _detect_python_env_type(self) -> str: + """Detect Python environment type.""" + if "conda" in sys.prefix.lower(): + return "conda" + elif hasattr(sys, "real_prefix") or ( + hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix + ): + # Check for common venv patterns + if os.environ.get("VIRTUAL_ENV"): + return "venv" + # Check for uv + if os.environ.get("UV"): + return "uv" + return "virtualenv" + elif os.environ.get("VIRTUAL_ENV"): + return "venv" + elif os.environ.get("UV"): + return "uv" + return "system" + + def _detect_install_method(self) -> str: + """Detect how CodeCarbon was installed.""" + # Check if editable install + import codecarbon + + codecarbon_path = os.path.dirname(codecarbon.__file__) + if ".egg-link" in codecarbon_path or ".editable" in codecarbon_path: + return "editable" + + # Check common package managers + # This is a heuristic - check if in common locations + if "site-packages" in codecarbon_path: + # Could be pip, uv, or conda + if "uv" in codecarbon_path: + return "uv" + elif "conda" in codecarbon_path: + return "conda" + return "pip" + return "unknown" + + def collect_hardware( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + ) -> "TelemetryCollector": + """Collect hardware info.""" + self._data.cpu_count = cpu_count + self._data.cpu_physical_count = cpu_physical_count + self._data.cpu_model = cpu_model + self._data.ram_total_size_gb = ram_total_gb + self._data.gpu_count = gpu_count + self._data.gpu_model = gpu_model + + # Try to detect CUDA + self._detect_cuda() + + # Try to detect GPU driver + self._detect_gpu_driver() + + return self + + def _detect_cuda(self) -> None: + """Detect CUDA version.""" + try: + import torch + + if hasattr(torch, "version") and torch.version: + self._data.cuda_version = str(torch.version.cuda) + if hasattr(torch.backends, "cudnn") and torch.backends.cudnn.is_available(): + self._data.cudnn_version = str(torch.backends.cudnn.version()) + except ImportError: + pass + + def _detect_gpu_driver(self) -> None: + """Detect GPU driver version.""" + try: + import subprocess + + result = subprocess.run( + ["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + self._data.gpu_driver_version = result.stdout.strip().split("\n")[0] + self._data.gpu_detection_method = "nvidia-smi" + + # Also get GPU memory + result = subprocess.run( + [ + "nvidia-smi", + "--query-gpu=memory.total", + "--format=csv,noheader,nounits", + ], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + mem_mb = result.stdout.strip().split("\n")[0] + self._data.gpu_memory_total_gb = float(mem_mb) / 1024 + except (FileNotFoundError, subprocess.TimeoutExpired, ValueError): + pass + + def collect_usage( + self, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + ) -> "TelemetryCollector": + """Collect usage patterns.""" + self._data.tracking_mode = tracking_mode + self._data.api_mode = api_mode + self._data.output_methods = output_methods or [] + self._data.hardware_tracked = hardware_tracked or [] + self._data.measure_power_interval_secs = measure_power_interval + + return self + + def collect_ml_ecosystem(self) -> "TelemetryCollector": + """Detect ML frameworks and libraries.""" + frameworks = [] + + # PyTorch + try: + import torch + + self._data.has_torch = True + self._data.torch_version = torch.__version__ + frameworks.append("pytorch") + except ImportError: + pass + + # Transformers + try: + import transformers + + self._data.has_transformers = True + self._data.transformers_version = transformers.__version__ + except ImportError: + pass + + # Diffusers + try: + import diffusers + + self._data.has_diffusers = True + self._data.diffusers_version = diffusers.__version__ + except ImportError: + pass + + # TensorFlow + try: + import tensorflow + + self._data.has_tensorflow = True + self._data.tensorflow_version = tensorflow.__version__ + frameworks.append("tensorflow") + except ImportError: + pass + + # Keras + try: + import keras + + self._data.has_keras = True + self._data.keras_version = keras.__version__ + except ImportError: + pass + + # PyTorch Lightning + try: + import pytorch_lightning + + self._data.has_pytorch_lightning = True + self._data.pytorch_lightning_version = pytorch_lightning.__version__ + except ImportError: + pass + + # FastAI + try: + import fastai + + self._data.has_fastai = True + self._data.fastai_version = fastai.__version__ + except ImportError: + pass + + # Primary framework + self._data.ml_framework_primary = frameworks[0] if frameworks else "" + + return self + + def collect_context(self) -> "TelemetryCollector": + """Collect development context (IDE, notebook, CI).""" + # Detect notebook + self._data.notebook_environment = self._detect_notebook() + + # Detect CI + self._data.ci_environment = self._detect_ci() + + # Detect container + self._detect_container() + + # Detect package manager + self._data.python_package_manager = self._detect_package_manager() + + return self + + def _detect_notebook(self) -> str: + """Detect notebook environment.""" + try: + # Check for Jupyter + import ipykernel + + return "jupyter" + except ImportError: + pass + + # Check environment variables common in cloud notebooks + if os.environ.get("COLAB_RELEASE_TAG"): + return "colab" + if os.environ.get("KAGGLE_URL_BASE"): + return "kaggle" + + return "none" + + def _detect_ci(self) -> str: + """Detect CI environment.""" + ci_vars = { + "GITHUB_ACTIONS": "github-actions", + "GITLAB_CI": "gitlab", + "JENKINS_URL": "jenkins", + "CIRCLECI": "circleci", + "TRAVIS": "travis", + "BUILDKITE": "buildkite", + "AWS_CODEBUILD": "codebuild", + } + + for var, name in ci_vars.items(): + if os.environ.get(var): + return name + + return "none" + + def _detect_container(self) -> None: + """Detect container runtime.""" + # Check for Docker + if os.path.exists("/.dockerenv"): + self._data.in_container = True + self._data.container_runtime = "docker" + return + + # Check for container environment variables + if os.environ.get("KUBERNETES_SERVICE_HOST"): + self._data.in_container = True + self._data.container_runtime = "kubernetes" + return + + # Check cgroup + try: + with open("/proc/1/cgroup", "r") as f: + content = f.read() + if "docker" in content or "containerd" in content: + self._data.in_container = True + self._data.container_runtime = "docker" + return + except FileNotFoundError: + pass + + self._data.in_container = False + self._data.container_runtime = "none" + + def _detect_package_manager(self) -> str: + """Detect Python package manager.""" + # Check for poetry + if os.path.exists("pyproject.toml"): + with open("pyproject.toml", "r") as f: + if "[tool.poetry]" in f.read(): + return "poetry" + + # Check for uv + if os.path.exists("uv.lock"): + return "uv" + + # Check for pipenv + if os.path.exists("Pipfile"): + return "pipenv" + + # Check for conda + if os.path.exists("environment.yml") or os.path.exists("environment.yaml"): + return "conda" + + return "pip" + + def collect_errors( + self, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + ) -> "TelemetryCollector": + """Collect error information.""" + self._data.rapl_available = rapl_available + self._data.hardware_detection_success = hardware_detection_success + self._data.errors_encountered = errors or [] + + return self + + def collect_emissions( + self, + total_emissions_kg: float = 0.0, + emissions_rate_kg_per_sec: float = 0.0, + energy_consumed_kwh: float = 0.0, + cpu_energy_kwh: float = 0.0, + gpu_energy_kwh: float = 0.0, + ram_energy_kwh: float = 0.0, + duration_seconds: float = 0.0, + cpu_utilization_avg: float = 0.0, + gpu_utilization_avg: float = 0.0, + ram_utilization_avg: float = 0.0, + ) -> "TelemetryCollector": + """Collect emissions data (Tier 2: Public).""" + self._data.total_emissions_kg = total_emissions_kg + self._data.emissions_rate_kg_per_sec = emissions_rate_kg_per_sec + self._data.energy_consumed_kwh = energy_consumed_kwh + self._data.cpu_energy_kwh = cpu_energy_kwh + self._data.gpu_energy_kwh = gpu_energy_kwh + self._data.ram_energy_kwh = ram_energy_kwh + self._data.duration_seconds = duration_seconds + self._data.cpu_utilization_avg = cpu_utilization_avg + self._data.gpu_utilization_avg = gpu_utilization_avg + self._data.ram_utilization_avg = ram_utilization_avg + + return self + + def collect_cloud_info( + self, cloud_provider: str = "", cloud_region: str = "" + ) -> "TelemetryCollector": + """Collect cloud information.""" + self._data.cloud_provider = cloud_provider + self._data.cloud_region = cloud_region + + return self + + def collect_all( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + cloud_provider: str = "", + cloud_region: str = "", + ) -> TelemetryData: + """Collect all available telemetry data.""" + ( + self.collect_environment() + .collect_hardware( + cpu_count=cpu_count, + cpu_physical_count=cpu_physical_count, + cpu_model=cpu_model, + gpu_count=gpu_count, + gpu_model=gpu_model, + ram_total_gb=ram_total_gb, + ) + .collect_usage( + tracking_mode=tracking_mode, + api_mode=api_mode, + output_methods=output_methods, + hardware_tracked=hardware_tracked, + measure_power_interval=measure_power_interval, + ) + .collect_ml_ecosystem() + .collect_context() + .collect_errors( + rapl_available=rapl_available, + hardware_detection_success=hardware_detection_success, + errors=errors, + ) + .collect_cloud_info( + cloud_provider=cloud_provider, cloud_region=cloud_region + ) + ) + + return self._data diff --git a/codecarbon/core/telemetry/config.py b/codecarbon/core/telemetry/config.py new file mode 100644 index 000000000..d130bf8cd --- /dev/null +++ b/codecarbon/core/telemetry/config.py @@ -0,0 +1,209 @@ +""" +Telemetry configuration module. + +Handles the 3-tier telemetry system: +- off: No telemetry +- internal: Private telemetry (helps CodeCarbon improve) +- public: Public telemetry (shares emissions for leaderboard) + +For Tier 1 (internal): POST to /telemetry endpoint +For Tier 2 (public): Uses core public API with project token +""" + +import json +import os +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Optional + +import appdirs + +from codecarbon.external.logger import logger + +# Environment variable name for telemetry setting +TELEMETRY_ENV_VAR = "CODECARBON_TELEMETRY" + +# Environment variable for project token (Tier 2 / public) +TELEMETRY_PROJECT_TOKEN_ENV_VAR = "CODECARBON_TELEMETRY_PROJECT_TOKEN" + +# Environment variable for API endpoint +TELEMETRY_API_ENDPOINT_ENV_VAR = "CODECARBON_TELEMETRY_API_ENDPOINT" + +# Legacy OTEL endpoint env var (for backward compatibility) +OTEL_ENDPOINT_ENV_VAR = "CODECARBON_OTEL_ENDPOINT" + +# Default API endpoint +DEFAULT_API_ENDPOINT = "https://api.codecarbon.io" + + +class TelemetryTier(str, Enum): + """Telemetry tiers.""" + + OFF = "off" + INTERNAL = "internal" + PUBLIC = "public" + + +@dataclass +class TelemetryConfig: + """Telemetry configuration.""" + + tier: TelemetryTier + project_token: Optional[str] + api_endpoint: Optional[str] + has_consent: bool + first_run: bool + # Legacy OTEL support (still used for internal tier) + otel_endpoint: Optional[str] = None + + @property + def is_enabled(self) -> bool: + """Check if telemetry is enabled.""" + return self.tier != TelemetryTier.OFF + + @property + def is_public(self) -> bool: + """Check if public telemetry (emissions shared).""" + return self.tier == TelemetryTier.PUBLIC + + @property + def is_internal(self) -> bool: + """Check if internal telemetry (private).""" + return self.tier == TelemetryTier.INTERNAL + + +def get_user_config_dir() -> Path: + """Get the user config directory.""" + return Path(appdirs.user_config_dir("codecarbon", "CodeCarbon")) + + +def get_telemetry_preference_file() -> Path: + """Get the file path for storing telemetry preference.""" + return get_user_config_dir() / "telemetry_preference.txt" + + +def save_telemetry_preference(tier: TelemetryTier, dont_ask_again: bool = False) -> None: + """Save user's telemetry preference.""" + config_dir = get_user_config_dir() + config_dir.mkdir(parents=True, exist_ok=True) + + pref_file = get_telemetry_preference_file() + content = f"{tier.value}\n" + if dont_ask_again: + content += "dont_ask_again\n" + pref_file.write_text(content) + logger.info(f"Saved telemetry preference: {tier.value}") + + +def load_telemetry_preference() -> Optional[tuple[TelemetryTier, bool]]: + """Load user's saved telemetry preference. + + Returns: + Tuple of (tier, dont_ask_again) or None if not set. + """ + pref_file = get_telemetry_preference_file() + if not pref_file.exists(): + return None + + try: + content = pref_file.read_text().strip() + lines = content.split("\n") + tier = TelemetryTier(lines[0]) + dont_ask_again = len(lines) > 1 and "dont_ask_again" in lines[1] + return (tier, dont_ask_again) + except (ValueError, IndexError) as e: + logger.debug(f"Could not parse telemetry preference: {e}") + return None + + +def detect_tier_from_env() -> Optional[TelemetryTier]: + """Detect telemetry tier from environment variable.""" + env_value = os.environ.get(TELEMETRY_ENV_VAR, "").lower().strip() + if not env_value: + return None + + try: + return TelemetryTier(env_value) + except ValueError: + logger.warning( + f"Invalid CODECARBON_TELEMETRY value: {env_value}. " + f"Valid values: {', '.join(t.value for t in TelemetryTier)}" + ) + return None + + +def get_otel_endpoint() -> Optional[str]: + """Get OTEL endpoint from environment or return None for default.""" + return os.environ.get(OTEL_ENDPOINT_ENV_VAR) + + +def get_telemetry_project_token() -> Optional[str]: + """Get telemetry project token from environment.""" + return os.environ.get(TELEMETRY_PROJECT_TOKEN_ENV_VAR) + + +def get_telemetry_api_endpoint() -> Optional[str]: + """Get telemetry API endpoint from environment.""" + return os.environ.get(TELEMETRY_API_ENDPOINT_ENV_VAR) + + +def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: + """ + Get the telemetry configuration. + + Priority order: + 1. Environment variable (CODECARBON_TELEMETRY) + 2. Saved user preference + 3. Default to internal (first run) - telemetry enabled by default + + Args: + force_first_run: Force treating this as first run (for testing) + + Returns: + TelemetryConfig object + """ + # Get common config values + project_token = get_telemetry_project_token() + api_endpoint = get_telemetry_api_endpoint() + otel_endpoint = get_otel_endpoint() + + # Check environment variable first + tier = detect_tier_from_env() + if tier is not None: + return TelemetryConfig( + tier=tier, + project_token=project_token, + api_endpoint=api_endpoint, + otel_endpoint=otel_endpoint, + has_consent=True, + first_run=False, + ) + + # Check saved preference + saved = load_telemetry_preference() + if saved is not None: + tier, dont_ask = saved + return TelemetryConfig( + tier=tier, + project_token=project_token, + api_endpoint=api_endpoint, + otel_endpoint=otel_endpoint, + has_consent=True, + first_run=False, + ) + + # First run - default to internal (telemetry enabled by default to help CodeCarbon improve) + return TelemetryConfig( + tier=TelemetryTier.INTERNAL, + project_token=project_token, + api_endpoint=api_endpoint, + otel_endpoint=otel_endpoint, + has_consent=True, + first_run=True, + ) + + +def set_telemetry_tier(tier: TelemetryTier, dont_ask_again: bool = False) -> None: + """Set the telemetry tier.""" + save_telemetry_preference(tier, dont_ask_again) diff --git a/codecarbon/core/telemetry/otel_exporter.py b/codecarbon/core/telemetry/otel_exporter.py new file mode 100644 index 000000000..a223b33c8 --- /dev/null +++ b/codecarbon/core/telemetry/otel_exporter.py @@ -0,0 +1,223 @@ +""" +OpenTelemetry exporter for CodeCarbon telemetry. + +Sends telemetry data to an OTEL-compatible endpoint. +""" + +from typing import Any, Dict, Optional + +from codecarbon.core.telemetry.collector import TelemetryData +from codecarbon.core.telemetry.config import TelemetryConfig, TelemetryTier +from codecarbon.external.logger import logger + +# Try to import OpenTelemetry +try: + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as OTLPSpanExporterHTTP + + OTEL_AVAILABLE = True +except ImportError: + OTEL_AVAILABLE = False + logger.debug("OpenTelemetry not available, telemetry will not be exported") + + +class TelemetryExporter: + """ + Exports telemetry data via OpenTelemetry. + + Supports both gRPC and HTTP exporters. + """ + + def __init__(self, config: TelemetryConfig): + """ + Initialize the exporter. + + Args: + config: Telemetry configuration + """ + self._config = config + self._tracer = None + self._initialized = False + + if not OTEL_AVAILABLE: + logger.warning( + "OpenTelemetry not installed. " + "Install with: pip install opentelemetry-api opentelemetry-sdk " + "opentelemetry-exporter-otlp" + ) + return + + if not config.is_enabled: + logger.debug("Telemetry disabled, not initializing exporter") + return + + self._initialize() + + def _initialize(self) -> None: + """Initialize OpenTelemetry tracer.""" + if self._initialized: + return + + try: + # Set up tracer provider + provider = TracerProvider() + trace.set_tracer_provider(provider) + + # Determine endpoint + endpoint = self._config.otel_endpoint + if not endpoint: + logger.debug("No OTEL endpoint configured, skipping exporter init") + return + + # Choose HTTP or gRPC based on endpoint + if endpoint.startswith("http://") or endpoint.startswith("https://"): + # HTTP exporter + exporter = OTLPSpanExporterHTTP(endpoint=endpoint) + else: + # Default to gRPC + exporter = OTLPSpanExporter(endpoint=endpoint) + + # Add batch processor + processor = BatchSpanProcessor(exporter) + provider.add_span_processor(processor) + + # Get tracer + self._tracer = trace.get_tracer(__name__) + self._initialized = True + logger.info(f"Telemetry exporter initialized with endpoint: {endpoint}") + + except Exception as e: + logger.warning(f"Failed to initialize OpenTelemetry exporter: {e}") + self._initialized = False + + def export_telemetry( + self, + data: TelemetryData, + emissions_data: Optional[TelemetryData] = None, + ) -> bool: + """ + Export telemetry data. + + Args: + data: The telemetry data to export + emissions_data: Optional emissions data (only for public tier) + + Returns: + True if export succeeded, False otherwise + """ + if not self._initialized or not self._tracer: + logger.debug("Telemetry exporter not initialized, skipping export") + return False + + if not self._config.is_enabled: + return False + + try: + with self._tracer.start_as_current_span("codecarbon.telemetry") as span: + # Add attributes based on tier + self._add_attributes(span, data) + + # For public tier, also add emissions data + if self._config.is_public and emissions_data: + self._add_emissions_attributes(span, emissions_data) + + logger.debug("Telemetry data exported successfully") + return True + + except Exception as e: + logger.warning(f"Failed to export telemetry: {e}") + return False + + def _add_attributes(self, span, data: TelemetryData) -> None: + """Add telemetry attributes to span.""" + # Environment & Hardware (always for internal/public) + if self._config.is_internal or self._config.is_public: + span.set_attribute("codecarbon.os", data.os) + span.set_attribute("codecarbon.python_version", data.python_version) + span.set_attribute("codecarbon.python_implementation", data.python_implementation) + span.set_attribute("codecarbon.python_env_type", data.python_env_type) + span.set_attribute("codecarbon.codecarbon_version", data.codecarbon_version) + span.set_attribute("codecarbon.codecarbon_install_method", data.codecarbon_install_method) + + # Hardware + span.set_attribute("codecarbon.cpu_count", data.cpu_count) + span.set_attribute("codecarbon.cpu_physical_count", data.cpu_physical_count) + span.set_attribute("codecarbon.cpu_model", data.cpu_model) + span.set_attribute("codecarbon.cpu_architecture", data.cpu_architecture) + span.set_attribute("codecarbon.gpu_count", data.gpu_count) + span.set_attribute("codecarbon.gpu_model", data.gpu_model) + span.set_attribute("codecarbon.ram_total_gb", data.ram_total_size_gb) + + # CUDA/GPU + if data.cuda_version: + span.set_attribute("codecarbon.cuda_version", data.cuda_version) + if data.gpu_driver_version: + span.set_attribute("codecarbon.gpu_driver_version", data.gpu_driver_version) + + # Usage patterns + span.set_attribute("codecarbon.tracking_mode", data.tracking_mode) + span.set_attribute("codecarbon.api_mode", data.api_mode) + span.set_attribute("codecarbon.hardware_tracked", ",".join(data.hardware_tracked)) + span.set_attribute("codecarbon.output_methods", ",".join(data.output_methods)) + span.set_attribute("codecarbon.measure_power_interval", data.measure_power_interval_secs) + + # ML Ecosystem + span.set_attribute("codecarbon.has_torch", data.has_torch) + span.set_attribute("codecarbon.torch_version", data.torch_version or "") + span.set_attribute("codecarbon.has_transformers", data.has_transformers) + span.set_attribute("codecarbon.has_diffusers", data.has_diffusers) + span.set_attribute("codecarbon.has_tensorflow", data.has_tensorflow) + span.set_attribute("codecarbon.has_keras", data.has_keras) + span.set_attribute("codecarbon.ml_framework_primary", data.ml_framework_primary) + + # Context + span.set_attribute("codecarbon.notebook_environment", data.notebook_environment) + span.set_attribute("codecarbon.ci_environment", data.ci_environment) + span.set_attribute("codecarbon.container_runtime", data.container_runtime) + span.set_attribute("codecarbon.in_container", data.in_container) + span.set_attribute("codecarbon.python_package_manager", data.python_package_manager) + + # Performance + span.set_attribute("codecarbon.hardware_detection_success", data.hardware_detection_success) + span.set_attribute("codecarbon.rapl_available", data.rapl_available) + span.set_attribute("codecarbon.gpu_detection_method", data.gpu_detection_method) + + # Cloud + span.set_attribute("codecarbon.cloud_provider", data.cloud_provider) + span.set_attribute("codecarbon.cloud_region", data.cloud_region) + + def _add_emissions_attributes(self, span, data: TelemetryData) -> None: + """Add emissions attributes to span (public tier only).""" + # Emissions data - shared publicly + span.set_attribute("codecarbon.emissions_kg", data.total_emissions_kg) + span.set_attribute("codecarbon.emissions_rate_kg_per_sec", data.emissions_rate_kg_per_sec) + span.set_attribute("codecarbon.energy_consumed_kwh", data.energy_consumed_kwh) + span.set_attribute("codecarbon.cpu_energy_kwh", data.cpu_energy_kwh) + span.set_attribute("codecarbon.gpu_energy_kwh", data.gpu_energy_kwh) + span.set_attribute("codecarbon.ram_energy_kwh", data.ram_energy_kwh) + span.set_attribute("codecarbon.duration_seconds", data.duration_seconds) + span.set_attribute("codecarbon.cpu_utilization_avg", data.cpu_utilization_avg) + span.set_attribute("codecarbon.gpu_utilization_avg", data.gpu_utilization_avg) + span.set_attribute("codecarbon.ram_utilization_avg", data.ram_utilization_avg) + + +def create_exporter(config: TelemetryConfig) -> Optional[TelemetryExporter]: + """ + Create a telemetry exporter based on config. + + Args: + config: Telemetry configuration + + Returns: + TelemetryExporter instance or None if not available + """ + if not OTEL_AVAILABLE: + return None + + if not config.is_enabled: + return None + + return TelemetryExporter(config) diff --git a/codecarbon/core/telemetry/prompt.py b/codecarbon/core/telemetry/prompt.py new file mode 100644 index 000000000..0a61950d1 --- /dev/null +++ b/codecarbon/core/telemetry/prompt.py @@ -0,0 +1,169 @@ +""" +First-run prompt for telemetry consent. + +Shows an interactive prompt to let users choose their telemetry level. +""" + +from typing import Optional + +from codecarbon.core.telemetry.config import ( + TelemetryTier, + get_telemetry_config, + save_telemetry_preference, +) +from codecarbon.external.logger import logger + +# Try to import rich/questionary for interactive prompts +# Falls back to simple input if not available +try: + from rich.console import Console + from rich.prompt import Prompt + + RICH_AVAILABLE = True +except ImportError: + RICH_AVAILABLE = False + +try: + import questionary + + QUESTIONARY_AVAILABLE = True +except ImportError: + QUESTIONARY_AVAILABLE = False + + +console = Console() if RICH_AVAILABLE else None + + +def prompt_for_telemetry_consent() -> Optional[TelemetryTier]: + """ + Prompt user for telemetry consent on first run. + + Returns: + The chosen TelemetryTier, or None if prompt should not be shown. + """ + config = get_telemetry_config() + + # Don't prompt if consent already given via env var or saved preference + if config.has_consent: + return config.tier + + # Check if we should prompt (first run without saved preference) + if not config.first_run: + return config.tier + + # Try interactive prompt, but don't fail if not available + if QUESTIONARY_AVAILABLE: + return _prompt_interactive_questionary() + elif RICH_AVAILABLE: + return _prompt_interactive_rich() + else: + return _prompt_simple() + + +def _prompt_interactive_questionary() -> Optional[TelemetryTier]: + """Prompt using questionary library.""" + try: + answer = questionary.select( + "šŸ“Š CodeCarbon Telemetry\n\n" + "Help improve CodeCarbon by sharing anonymous usage data?\n", + choices=[ + "Internal - Basic environment info (PRIVATE)", + "Public - Full telemetry (SHARED PUBLICLY on leaderboard)", + "Off - No telemetry", + ], + default="Internal - Basic environment info (PRIVATE)", + ).ask() + + if answer is None: + return TelemetryTier.OFF + + if "Internal" in answer: + return TelemetryTier.INTERNAL + elif "Public" in answer: + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Questionary prompt failed: {e}") + return TelemetryTier.OFF + + +def _prompt_interactive_rich() -> Optional[TelemetryTier]: + """Prompt using rich library.""" + if console is None: + return TelemetryTier.OFF + + try: + console.print("\nšŸ“Š [bold]CodeCarbon Telemetry[/bold]\n") + console.print( + "Help improve CodeCarbon by sharing anonymous usage data?\n" + ) + console.print(" [1] Internal - Basic environment info (PRIVATE)") + console.print(" • Python version, OS, CPU/GPU hardware") + console.print(" • Usage patterns, ML frameworks") + console.print(" • Helps us improve the library") + console.print() + console.print(" [2] Public - Full telemetry (SHARED PUBLICLY)") + console.print(" • All of internal + emissions data") + console.print(" • Shown on public leaderboard") + console.print() + console.print(" [3] Off - No telemetry") + console.print() + + answer = Prompt.ask( + "Select option", + choices=["1", "2", "3"], + default="1", + ) + + if answer == "1": + return TelemetryTier.INTERNAL + elif answer == "2": + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Rich prompt failed: {e}") + return TelemetryTier.OFF + + +def _prompt_simple() -> Optional[TelemetryTier]: + """Simple input-based prompt.""" + try: + print("\nšŸ“Š CodeCarbon Telemetry") + print("=" * 40) + print("Help improve CodeCarbon by sharing anonymous usage data?") + print() + print(" 1) Internal - Basic environment info (PRIVATE)") + print(" 2) Public - Full telemetry (SHARED PUBLICLY)") + print(" 3) Off - No telemetry") + print() + answer = input("Select option [1]: ").strip() or "1" + + if answer == "1": + return TelemetryTier.INTERNAL + elif answer == "2": + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Simple prompt failed: {e}") + return TelemetryTier.OFF + + +def prompt_and_save() -> TelemetryTier: + """ + Prompt user and save their choice. + + Returns: + The chosen TelemetryTier. + """ + tier = prompt_for_telemetry_consent() + + if tier is None: + tier = TelemetryTier.OFF + + # Save the preference (don't ask again) + save_telemetry_preference(tier, dont_ask_again=True) + + return tier diff --git a/codecarbon/core/telemetry/service.py b/codecarbon/core/telemetry/service.py new file mode 100644 index 000000000..8d0554ad1 --- /dev/null +++ b/codecarbon/core/telemetry/service.py @@ -0,0 +1,221 @@ +""" +Telemetry service - integrates telemetry with CodeCarbon. + +This module provides functions to initialize and use telemetry. +""" + +from typing import Optional + +from codecarbon.core.telemetry.collector import TelemetryCollector, TelemetryData +from codecarbon.core.telemetry.config import ( + TelemetryConfig, + TelemetryTier, + get_telemetry_config, + set_telemetry_tier, +) +from codecarbon.core.telemetry.otel_exporter import TelemetryExporter, create_exporter +from codecarbon.core.telemetry.prompt import prompt_for_telemetry_consent +from codecarbon.external.logger import logger + + +class TelemetryService: + """Service for managing telemetry.""" + + _instance: Optional["TelemetryService"] = None + _initialized: bool = False + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + if self._initialized: + return + self._config: Optional[TelemetryConfig] = None + self._exporter: Optional[TelemetryExporter] = None + self._collector: Optional[TelemetryCollector] = None + self._initialized = True + + def initialize(self, force_prompt: bool = False) -> TelemetryConfig: + """ + Initialize telemetry service. + + Args: + force_prompt: Force showing the consent prompt + + Returns: + TelemetryConfig + """ + # Get configuration + self._config = get_telemetry_config() + + # If first run and not forced, try to prompt + if self._config.first_run and not self._config.has_consent: + if force_prompt: + # This will show prompt if needed + pass + + # Create exporter if enabled + if self._config.is_enabled: + self._exporter = create_exporter(self._config) + self._collector = TelemetryCollector() + + logger.info( + f"Telemetry initialized: tier={self._config.tier.value}, " + f"enabled={self._config.is_enabled}" + ) + + return self._config + + def get_config(self) -> Optional[TelemetryConfig]: + """Get current telemetry config.""" + return self._config + + def collect_and_export( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + cloud_provider: str = "", + cloud_region: str = "", + ) -> bool: + """ + Collect and export telemetry data. + + Returns: + True if successful, False otherwise + """ + if not self._config or not self._config.is_enabled: + return False + + if not self._collector or not self._exporter: + return False + + try: + # Collect data + data = self._collector.collect_all( + cpu_count=cpu_count, + cpu_physical_count=cpu_physical_count, + cpu_model=cpu_model, + gpu_count=gpu_count, + gpu_model=gpu_model, + ram_total_gb=ram_total_gb, + tracking_mode=tracking_mode, + api_mode=api_mode, + output_methods=output_methods, + hardware_tracked=hardware_tracked, + measure_power_interval=measure_power_interval, + rapl_available=rapl_available, + hardware_detection_success=hardware_detection_success, + errors=errors, + cloud_provider=cloud_provider, + cloud_region=cloud_region, + ) + + # Export + return self._exporter.export_telemetry(data) + + except Exception as e: + logger.warning(f"Failed to collect/export telemetry: {e}") + return False + + def export_emissions( + self, + total_emissions_kg: float = 0.0, + emissions_rate_kg_per_sec: float = 0.0, + energy_consumed_kwh: float = 0.0, + cpu_energy_kwh: float = 0.0, + gpu_energy_kwh: float = 0.0, + ram_energy_kwh: float = 0.0, + duration_seconds: float = 0.0, + cpu_utilization_avg: float = 0.0, + gpu_utilization_avg: float = 0.0, + ram_utilization_avg: float = 0.0, + ) -> bool: + """ + Export emissions data (only for public tier). + + Returns: + True if successful, False otherwise + """ + if not self._config or not self._config.is_public: + return False + + if not self._collector or not self._exporter: + return False + + try: + # Collect emissions data + data = TelemetryData() + data.collect_emissions( + total_emissions_kg=total_emissions_kg, + emissions_rate_kg_per_sec=emissions_rate_kg_per_sec, + energy_consumed_kwh=energy_consumed_kwh, + cpu_energy_kwh=cpu_energy_kwh, + gpu_energy_kwh=gpu_energy_kwh, + ram_energy_kwh=ram_energy_kwh, + duration_seconds=duration_seconds, + cpu_utilization_avg=cpu_utilization_avg, + gpu_utilization_avg=gpu_utilization_avg, + ram_utilization_avg=ram_utilization_avg, + ) + + # Export + return self._exporter.export_telemetry(data) + + except Exception as e: + logger.warning(f"Failed to export emissions telemetry: {e}") + return False + + +# Global instance +_telemetry_service: Optional[TelemetryService] = None + + +def get_telemetry_service() -> TelemetryService: + """Get the global telemetry service instance.""" + global _telemetry_service + if _telemetry_service is None: + _telemetry_service = TelemetryService() + return _telemetry_service + + +def init_telemetry(force_prompt: bool = False) -> TelemetryConfig: + """ + Initialize telemetry. + + Args: + force_prompt: Force showing consent prompt + + Returns: + TelemetryConfig + """ + service = get_telemetry_service() + return service.initialize(force_prompt=force_prompt) + + +def set_telemetry(tier: str, dont_ask_again: bool = True) -> None: + """ + Set telemetry tier programmatically. + + Args: + tier: "off", "internal", or "public" + dont_ask_again: Don't ask again in future + """ + try: + tier_enum = TelemetryTier(tier) + set_telemetry_tier(tier_enum, dont_ask_again=dont_ask_again) + except ValueError: + logger.warning(f"Invalid telemetry tier: {tier}") diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index a070ea56c..64b00adb5 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -756,8 +756,38 @@ def stop(self) -> Optional[float]: for handler in self._output_handlers: handler.exit() + # Log telemetry configuration warning + self._log_telemetry_warning() + return emissions_data.emissions + def _log_telemetry_warning(self) -> None: + """ + Log a warning about telemetry configuration at the end of each run. + """ + from codecarbon.core.telemetry.config import ( + TELEMETRY_ENV_VAR, + TELEMETRY_PROJECT_TOKEN_ENV_VAR, + get_telemetry_config, + ) + + config = get_telemetry_config() + + if not config.is_enabled: + logger.warning( + f"Telemetry is disabled. To enable, run: codecarbon telemetry setup\n" + f"Or set environment variable: export {TELEMETRY_ENV_VAR}=internal" + ) + elif config.is_public and not config.project_token: + logger.warning( + f"Telemetry is set to 'public' but no project token is configured.\n" + f"To configure Tier 2 (public) telemetry, run: codecarbon telemetry setup\n" + f"Or set: export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}=" + ) + elif config.is_enabled and not config.first_run: + # Telemetry is properly configured + logger.debug(f"Telemetry enabled: tier={config.tier.value}") + def _persist_data( self, total_emissions: EmissionsData, diff --git a/docs/telemetry.md b/docs/telemetry.md new file mode 100644 index 000000000..f1c1b3dc5 --- /dev/null +++ b/docs/telemetry.md @@ -0,0 +1,112 @@ +# Telemetry + +CodeCarbon collects anonymous usage data to help improve the library. This page explains what we collect, how we handle your data, and how you can control it. + +## Telemetry Tiers + +CodeCarbon supports three telemetry levels: + +| Tier | Env Variable | Description | +|------|-------------|-------------| +| Off | `CODECARBON_TELEMETRY=off` | No telemetry collected | +| Internal | `CODECARBON_TELEMETRY=internal` | Private usage data (helps us improve CodeCarbon) | +| Public | `CODECARBON_TELEMETRY=public` | Full telemetry including emissions (shared on public leaderboard) | + +## What We Collect + +### Internal (Private) + +When you enable Internal telemetry, we collect: + +- **Environment**: Python version, OS, CodeCarbon version, installation method +- **Hardware**: CPU model/count, GPU model/count, RAM, CUDA version +- **Usage Patterns**: Tracking mode, output methods configured, hardware tracked +- **ML Ecosystem**: Detected frameworks (PyTorch, TensorFlow, Transformers, etc.) +- **Context**: Notebook environment, CI/CD detection, container runtime +- **Performance**: Hardware detection success, RAPL availability, errors + +### Public (Leaderboard) + +When you enable Public telemetry, everything above **plus**: + +- **Emissions Data**: Total CO2 emissions, energy consumed, duration +- **Utilization**: CPU, GPU, RAM utilization averages + +This data is shared publicly on the CodeCarbon leaderboard to encourage green computing practices. + +## Privacy + +We're committed to protecting your privacy: + +- **No PII**: We don't collect personally identifiable information +- **Anonymized**: Machine identifiers are hashed +- **GPS Precision**: Geographic coordinates are rounded to ~10km +- **GDPR Compliant**: We support opt-in consent and data deletion requests +- **Minimal Data**: We only collect what's needed to improve the library + +## Configuration + +### Environment Variables + +```bash +# Set telemetry tier +export CODECARBON_TELEMETRY=internal + +# Set custom OTEL endpoint (optional) +export CODECARBON_OTEL_ENDPOINT=https://your-otel-endpoint.com/v1/traces +``` + +### In Code + +```python +from codecarbon import EmissionsTracker + +# Telemetry can also be set in the tracker +tracker = EmissionsTracker( + project_name="my-project", + telemetry="internal" # or "public" or "off" +) +``` + +## First-Run Prompt + +On first run, CodeCarbon will prompt you to choose your telemetry level if: + +- No `CODECARBON_TELEMETRY` environment variable is set +- No previous preference was saved + +You can skip the prompt by setting the environment variable before running CodeCarbon. + +## Disabling Telemetry + +To completely disable telemetry: + +```bash +export CODECARBON_TELEMETRY=off +``` + +Or in your code: + +```python +tracker = EmissionsTracker(telemetry="off") +``` + +## OTEL Integration + +Telemetry data is sent via OpenTelemetry (OTEL). To use your own OTEL collector: + +```bash +export CODECARBON_OTEL_ENDPOINT=https://your-collector:4318/v1/traces +``` + +Install the OTEL extras if you want to export telemetry: + +```bash +pip install codecarbon[telemetry] +``` + +## Data Retention + +- Internal telemetry: Retained for 12 months +- Public leaderboard data: Displayed indefinitely +- You can request data deletion by contacting the CodeCarbon team diff --git a/mkdocs.yml b/mkdocs.yml index 4517f6b98..c6d4923e8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -128,4 +128,5 @@ nav: - Output: logging/output.md - Collecting emissions to a logger: logging/to_logger.md - Visualize: logging/visualize.md + - Telemetry: telemetry.md - Track GenAI API Calls (EcoLogits) ↗: https://ecologits.ai/latest/?utm_source=codecarbon&utm_medium=docs diff --git a/pyproject.toml b/pyproject.toml index 518acb7ed..d5a4169f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ + "appdirs", "arrow", "authlib>=1.2.1", "click", @@ -42,6 +43,13 @@ dependencies = [ "typer", ] +[project.optional-dependencies] +telemetry = [ + "opentelemetry-api", + "opentelemetry-sdk", + "opentelemetry-exporter-otlp", +] + [tool.setuptools.dynamic] version = {attr = "codecarbon._version.__version__"}