From bc1b0db7d8224de8d35a0660c957bc52e0667f2b Mon Sep 17 00:00:00 2001 From: Aditi Sah Date: Sun, 7 Dec 2025 15:22:06 +0530 Subject: [PATCH] fixed sending duplicate usage metrics --- .github/workflows/reusable-version-branch.yml | 36 ++++-- .../migrations/0002_add_metric_sent_model.py | 69 ++++++++++ openwisp_utils/metric_collection/models.py | 76 +++++++++++ .../metric_collection/tests/test_models.py | 121 +++++++++++++++++- 4 files changed, 286 insertions(+), 16 deletions(-) create mode 100644 openwisp_utils/metric_collection/migrations/0002_add_metric_sent_model.py diff --git a/.github/workflows/reusable-version-branch.yml b/.github/workflows/reusable-version-branch.yml index e14dd38a..31eae2d6 100644 --- a/.github/workflows/reusable-version-branch.yml +++ b/.github/workflows/reusable-version-branch.yml @@ -16,19 +16,26 @@ on: jobs: replicate: runs-on: ubuntu-latest + permissions: + # required for check-out and push + contents: write + # required for getting an authentication token + id-token: write steps: - name: Checkout repository - uses: actions/checkout@v6 + # Use actions/checkout@v4, the current major version + uses: actions/checkout@v4 + with: + # Fetch all history so rebase can work without a separate fetch step + fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v6 + uses: actions/setup-python@v5 with: python-version: "3.10" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip + # Cache pip dependencies for faster subsequent runs + cache: 'pip' - name: Install package if required if: ${{ inputs.install_package }} @@ -50,12 +57,13 @@ jobs: git config --global user.email 'github-actions[bot]@users.noreply.github.com' - name: Rebase changes onto version branch + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Pass the version from the previous step into the environment + VERSION: ${{ env.VERSION }} run: | - if git ls-remote --heads origin $VERSION | grep -sw $VERSION; then - git fetch origin --unshallow - git checkout -b $VERSION origin/$VERSION - git rebase origin/master - else - git checkout -b $VERSION - fi - git push origin $VERSION + # Create the version branch locally, tracking the remote if it exists + git branch $VERSION origin/$VERSION || true + git checkout $VERSION + git rebase origin/master + git push origin $VERSION --force-with-lease diff --git a/openwisp_utils/metric_collection/migrations/0002_add_metric_sent_model.py b/openwisp_utils/metric_collection/migrations/0002_add_metric_sent_model.py new file mode 100644 index 00000000..2bb4d4b7 --- /dev/null +++ b/openwisp_utils/metric_collection/migrations/0002_add_metric_sent_model.py @@ -0,0 +1,69 @@ +# Generated manually for adding MetricSent model + +from django.db import migrations, models +import django.utils.timezone +import model_utils.fields +import uuid + + +class Migration(migrations.Migration): + dependencies = [ + ("metric_collection", "0001_initial"), + ] + + operations = [ + migrations.CreateModel( + name="MetricSent", + fields=[ + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ( + "created", + model_utils.fields.AutoCreatedField( + default=django.utils.timezone.now, + editable=False, + verbose_name="created", + ), + ), + ( + "modified", + model_utils.fields.AutoLastModifiedField( + default=django.utils.timezone.now, + editable=False, + verbose_name="modified", + ), + ), + ( + "category", + models.CharField( + help_text="Type of metric (Install, Heartbeat, Upgrade, Consent Withdrawn)", + max_length=50, + ), + ), + ( + "metrics_hash", + models.CharField( + help_text="SHA-256 hash of the metrics payload", + max_length=64, + ), + ), + ( + "date", + models.DateField(help_text="Date when the metric was sent"), + ), + ], + options={ + "verbose_name": "Sent Metric", + "verbose_name_plural": "Sent Metrics", + "ordering": ("-created",), + "unique_together": {("category", "metrics_hash", "date")}, + }, + ), + ] \ No newline at end of file diff --git a/openwisp_utils/metric_collection/models.py b/openwisp_utils/metric_collection/models.py index 0e2e2714..5613e7ef 100644 --- a/openwisp_utils/metric_collection/models.py +++ b/openwisp_utils/metric_collection/models.py @@ -1,4 +1,7 @@ +import hashlib +import json import logging +from datetime import date from django.db import models from django.utils.translation import gettext_lazy as _ @@ -114,11 +117,29 @@ def send_usage_metrics(cls, category): category, {"Installation Method": get_openwisp_installation_method()} ) ) + + # Check if this exact payload has already been sent today + metrics_hash = get_payload_hash(metrics) + if MetricSent.objects.filter( + category=category, + metrics_hash=metrics_hash, + date=date.today() + ).exists(): + logger.info(f"Metrics already sent today for category={category}, skipping") + return + logger.info(f"Sending metrics, category={category}") post_metrics(metrics) logger.info(f"Metrics sent successfully, category={category}") logger.info(f"Metrics: {metrics}") + # Mark as sent + MetricSent.objects.create( + category=category, + metrics_hash=metrics_hash, + date=date.today() + ) + class Consent(TimeStampedEditableModel): """Stores consent to collect anonymous usage metrics. @@ -173,8 +194,63 @@ def update_consent_withdrawal(cls, sender, instance, **kwargs): logger.info("Consent withdrawn, sending final metric event") # Create a simple event for consent withdrawal events = get_events("Consent Withdrawn", {"Action": "Opt-out"}) + + # Check if this consent withdrawal has already been sent today + metrics_hash = get_payload_hash(events) + if MetricSent.objects.filter( + category="Consent Withdrawn", + metrics_hash=metrics_hash, + date=date.today() + ).exists(): + logger.info("Consent withdrawal metric already sent today, skipping") + return + post_metrics(events) logger.info("Consent withdrawal metric sent successfully") + + # Mark as sent + MetricSent.objects.create( + category="Consent Withdrawn", + metrics_hash=metrics_hash, + date=date.today() + ) except sender.DoesNotExist: # In case the instance doesn't exist in DB yet, just pass pass + + +class MetricSent(TimeStampedEditableModel): + """Tracks sent metrics to prevent duplicates. + + This model stores a hash of each metrics payload that has been sent, + along with the category and date, to ensure each unique payload is + sent at most once per day. + """ + + category = models.CharField( + max_length=50, + help_text="Type of metric (Install, Heartbeat, Upgrade, Consent Withdrawn)" + ) + metrics_hash = models.CharField( + max_length=64, + help_text="SHA-256 hash of the metrics payload" + ) + date = models.DateField( + help_text="Date when the metric was sent" + ) + + class Meta: + unique_together = ('category', 'metrics_hash', 'date') + ordering = ("-created",) + verbose_name = "Sent Metric" + verbose_name_plural = "Sent Metrics" + + +def get_payload_hash(events): + """Calculate SHA-256 hash of the events payload.""" + # Sort events to ensure consistent hashing regardless of order + sorted_events = sorted(events, key=lambda x: (x.get('category', ''), x.get('action', ''), x.get('name', ''))) + payload_str = json.dumps(sorted_events, sort_keys=True) + return hashlib.sha256(payload_str.encode('utf-8')).hexdigest() + + diff --git a/openwisp_utils/metric_collection/tests/test_models.py b/openwisp_utils/metric_collection/tests/test_models.py index 077f0f95..fc98318f 100644 --- a/openwisp_utils/metric_collection/tests/test_models.py +++ b/openwisp_utils/metric_collection/tests/test_models.py @@ -1,4 +1,4 @@ -from datetime import datetime, timezone +from datetime import datetime, timezone, date from unittest.mock import patch import requests @@ -11,7 +11,7 @@ from urllib3.response import HTTPResponse from .. import helper, models, tasks -from ..models import Consent, OpenwispVersion +from ..models import Consent, OpenwispVersion, MetricSent from . import ( _ENABLED_OPENWISP_MODULES_RETURN_VALUE, _HEARTBEAT_METRICS, @@ -369,3 +369,120 @@ def test_send_usage_metrics_user_opted_out(self, mocked_post_usage_metrics): Consent.objects.create(user_consented=False) tasks.send_usage_metrics.delay() mocked_post_usage_metrics.assert_not_called() + + +class TestSentMetric(TestCase): + def setUp(self): + # Clear MetricSent records + MetricSent.objects.all().delete() + + def test_get_payload_hash(self): + from ..models import get_payload_hash + events = [ + {"category": "Heartbeat", "action": "test", "name": "value", "value": 1, "times": 1, "period_start": 1234567890, "period_end": 1234567890} + ] + hash1 = get_payload_hash(events) + hash2 = get_payload_hash(events) + self.assertEqual(hash1, hash2) + self.assertEqual(len(hash1), 64) # SHA-256 hex length + + # Different events should have different hashes + events2 = [ + {"category": "Heartbeat", "action": "test", "name": "value2", "value": 1, "times": 1, "period_start": 1234567890, "period_end": 1234567890} + ] + hash3 = get_payload_hash(events2) + self.assertNotEqual(hash1, hash3) + + # Events with different timestamps should have the same hash + events3 = [ + {"category": "Heartbeat", "action": "test", "name": "value", "value": 1, "times": 1, "period_start": 1234567891, "period_end": 1234567891} + ] + hash4 = get_payload_hash(events3) + self.assertEqual(hash1, hash4) + + @freeze_time("2023-12-01") + def test_has_been_sent_today(self): + from ..models import get_payload_hash + events = [ + {"category": "Heartbeat", "action": "test", "name": "value", "value": 1, "times": 1, "period_start": 1234567890, "period_end": 1234567890} + ] + metrics_hash = get_payload_hash(events) + + # Should not exist initially + self.assertFalse(MetricSent.objects.filter( + category="Heartbeat", + metrics_hash=metrics_hash, + date=date.today() + ).exists()) + + # Create record + MetricSent.objects.create( + category="Heartbeat", + metrics_hash=metrics_hash, + date=date.today() + ) + + # Should exist now + self.assertTrue(MetricSent.objects.filter( + category="Heartbeat", + metrics_hash=metrics_hash, + date=date.today() + ).exists()) + + # Different category should not affect + self.assertFalse(MetricSent.objects.filter( + category="Install", + metrics_hash=metrics_hash, + date=date.today() + ).exists()) + + @patch.object(models, "get_openwisp_version", return_value="23.0.0a") + @patch.object( + models, + "get_enabled_openwisp_modules", + return_value=_ENABLED_OPENWISP_MODULES_RETURN_VALUE, + ) + @patch.object( + models, + "get_os_details", + return_value=_OS_DETAILS_RETURN_VALUE, + ) + @patch("openwisp_utils.metric_collection.models.post_metrics") + @freeze_time("2023-12-01 00:00:00") + def test_send_usage_metrics_prevents_duplicate_sends_same_day(self, mocked_post, *args): + # First send should work + tasks.send_usage_metrics.delay(category="Heartbeat") + mocked_post.assert_called_once() + + # Reset mock + mocked_post.reset_mock() + + # Second send on same day should be skipped + tasks.send_usage_metrics.delay(category="Heartbeat") + mocked_post.assert_not_called() + + @patch.object(models, "get_openwisp_version", return_value="23.0.0a") + @patch.object( + models, + "get_enabled_openwisp_modules", + return_value=_ENABLED_OPENWISP_MODULES_RETURN_VALUE, + ) + @patch.object( + models, + "get_os_details", + return_value=_OS_DETAILS_RETURN_VALUE, + ) + @patch("openwisp_utils.metric_collection.models.post_metrics") + @freeze_time("2023-12-01 00:00:00") + def test_send_usage_metrics_allows_send_next_day(self, mocked_post, *args): + # Send on day 1 + tasks.send_usage_metrics.delay(category="Heartbeat") + mocked_post.assert_called_once() + + # Reset mock + mocked_post.reset_mock() + + # Move to next day + with freeze_time("2023-12-02 00:00:00"): + tasks.send_usage_metrics.delay(category="Heartbeat") + mocked_post.assert_called_once() # Should be called again