diff --git a/Makefile b/Makefile index 1f5564166b8..b720e1b1619 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,9 @@ importer-tests: recoverer-tests: cd gcp/workers/recoverer && ./run_tests.sh +vanir-signatures-tests: + cd gcp/workers/vanir_signatures && ./run_tests.sh + website-tests: cd gcp/website && ./run_tests.sh diff --git a/deployment/build-and-stage.yaml b/deployment/build-and-stage.yaml index e0c8792efdf..532ad6bf320 100644 --- a/deployment/build-and-stage.yaml +++ b/deployment/build-and-stage.yaml @@ -216,6 +216,16 @@ steps: args: ['push', '--all-tags', 'gcr.io/oss-vdb-test/osv-linter'] waitFor: ['build-osv-linter', 'cloud-build-queue'] +# Build/push vanir-signatures images to gcr.io/oss-vdb. +- name: gcr.io/cloud-builders/docker + args: ['build', '-t', 'gcr.io/oss-vdb/vanir-signatures:latest', '-t', 'gcr.io/oss-vdb/vanir-signatures:$COMMIT_SHA', '.'] + dir: 'gcp/workers/vanir_signatures' + id: 'build-vanir-signatures' + waitFor: ['build-worker'] +- name: gcr.io/cloud-builders/docker + args: ['push', '--all-tags', 'gcr.io/oss-vdb/vanir-signatures'] + waitFor: ['build-vanir-signatures', 'cloud-build-queue'] + # Build/push cron job images. - name: gcr.io/cloud-builders/docker args: ['build', '-t', 'gcr.io/oss-vdb/cron:latest', '-t', 'gcr.io/oss-vdb/cron:$COMMIT_SHA', '.'] @@ -432,6 +442,7 @@ steps: relations=gcr.io/oss-vdb/relations:$COMMIT_SHA,\ generatesitemap=gcr.io/oss-vdb/generatesitemap:$COMMIT_SHA,\ gitter=gcr.io/oss-vdb/gitter:$COMMIT_SHA,\ + vanir-signatures=gcr.io/oss-vdb/vanir-signatures:$COMMIT_SHA,\ cron=gcr.io/oss-vdb/cron:$COMMIT_SHA" ] dir: deployment/clouddeploy/gke-workers @@ -496,3 +507,4 @@ images: - 'gcr.io/oss-vdb/oss-fuzz-importer:$COMMIT_SHA' - 'gcr.io/oss-vdb/generatesitemap:$COMMIT_SHA' - 'gcr.io/oss-vdb/gitter:$COMMIT_SHA' +- 'gcr.io/oss-vdb/vanir-signatures:$COMMIT_SHA' diff --git a/deployment/clouddeploy/gke-workers/base/kustomization.yaml b/deployment/clouddeploy/gke-workers/base/kustomization.yaml index 4125bd5a47c..4d21838b682 100644 --- a/deployment/clouddeploy/gke-workers/base/kustomization.yaml +++ b/deployment/clouddeploy/gke-workers/base/kustomization.yaml @@ -30,4 +30,5 @@ resources: - record-checker.yaml - cve5-to-osv.yaml - custommetrics.yaml +- vanir-signatures.yaml diff --git a/deployment/clouddeploy/gke-workers/base/vanir-signatures.yaml b/deployment/clouddeploy/gke-workers/base/vanir-signatures.yaml new file mode 100644 index 00000000000..a030d4a94e7 --- /dev/null +++ b/deployment/clouddeploy/gke-workers/base/vanir-signatures.yaml @@ -0,0 +1,26 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: vanir-signatures + labels: + cronLastSuccessfulTimeMins: "1500" +spec: + schedule: "0 9 * * *" + timeZone: "Australia/Sydney" + concurrencyPolicy: Forbid + jobTemplate: + spec: + template: + spec: + containers: + - name: vanir-signatures + image: vanir-signatures + imagePullPolicy: Always + resources: + requests: + cpu: "1" + memory: "10G" + limits: + cpu: "1" + memory: "13G" + restartPolicy: Never diff --git a/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/kustomization.yaml b/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/kustomization.yaml index 601085fd537..91165997cf7 100644 --- a/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/kustomization.yaml +++ b/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/kustomization.yaml @@ -26,3 +26,4 @@ patches: - path: record-checker.yaml - path: custommetrics.yaml - path: gitter.yaml +- path: vanir-signatures.yaml diff --git a/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/vanir-signatures.yaml b/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/vanir-signatures.yaml new file mode 100644 index 00000000000..cae25cded4a --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/vanir-signatures.yaml @@ -0,0 +1,16 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: vanir-signatures +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: vanir-signatures + env: + - name: GOOGLE_CLOUD_PROJECT + value: oss-vdb-test + - name: OSV_VULNERABILITIES_BUCKET + value: osv-test-vulnerabilities diff --git a/deployment/clouddeploy/gke-workers/environments/oss-vdb/kustomization.yaml b/deployment/clouddeploy/gke-workers/environments/oss-vdb/kustomization.yaml index f04532deffc..c95f9ffc101 100644 --- a/deployment/clouddeploy/gke-workers/environments/oss-vdb/kustomization.yaml +++ b/deployment/clouddeploy/gke-workers/environments/oss-vdb/kustomization.yaml @@ -23,4 +23,5 @@ patches: - path: cve5-to-osv.yaml - path: custommetrics.yaml - path: gitter.yaml +- path: vanir-signatures.yaml diff --git a/deployment/clouddeploy/gke-workers/environments/oss-vdb/vanir-signatures.yaml b/deployment/clouddeploy/gke-workers/environments/oss-vdb/vanir-signatures.yaml new file mode 100644 index 00000000000..3bbd48cc966 --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/oss-vdb/vanir-signatures.yaml @@ -0,0 +1,16 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: vanir-signatures +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: vanir-signatures + env: + - name: GOOGLE_CLOUD_PROJECT + value: oss-vdb + - name: OSV_VULNERABILITIES_BUCKET + value: osv-vulnerabilities diff --git a/gcp/workers/cloudbuild.yaml b/gcp/workers/cloudbuild.yaml index 51390bb3038..242fd8e79d2 100644 --- a/gcp/workers/cloudbuild.yaml +++ b/gcp/workers/cloudbuild.yaml @@ -62,6 +62,14 @@ steps: - GITTER_PORT=8891 waitFor: ['init', 'sync'] +- name: 'gcr.io/oss-vdb/ci' + id: 'vanir-signatures-tests' + dir: gcp/workers/vanir_signatures + args: ['bash', '-ex', 'run_tests.sh'] + env: + - DATASTORE_EMULATOR_PORT=8006 + waitFor: ['init', 'sync'] + timeout: 7200s options: machineType: E2_HIGHCPU_8 diff --git a/gcp/workers/oss_fuzz_worker/worker.py b/gcp/workers/oss_fuzz_worker/worker.py index 9187b6b6c51..433b01dd8bd 100644 --- a/gcp/workers/oss_fuzz_worker/worker.py +++ b/gcp/workers/oss_fuzz_worker/worker.py @@ -32,7 +32,6 @@ from google.cloud import pubsub_v1 from google.cloud import storage from google.cloud.storage import retry -from google.protobuf import json_format sys.path.append(os.path.dirname(os.path.realpath(__file__))) import osv @@ -42,8 +41,6 @@ from osv import vulnerability_pb2 import oss_fuzz -from vanir import vulnerability_manager - DEFAULT_WORK_DIR = '/work' OSS_FUZZ_GIT_URL = 'https://github.com/google/oss-fuzz.git' TASK_SUBSCRIPTION = 'oss-fuzz-tasks' @@ -499,43 +496,6 @@ def _analyze_vulnerability(self, source_repo, repo, vulnerability, path, vulnerability.id) raise UpdateConflictError - def _generate_vanir_signatures(self, vulnerability): - """Generates Vanir signatures for a vulnerability.""" - if not any(r.type == vulnerability_pb2.Range.GIT - for affected in vulnerability.affected - for r in affected.ranges): - logging.info( - 'Skipping Vanir signature generation for %s as it has no ' - 'GIT affected ranges.', vulnerability.id) - return vulnerability - if any(affected.package.name == "Kernel" and - affected.package.ecosystem == "Linux" - for affected in vulnerability.affected): - logging.info( - 'Skipping Vanir signature generation for %s as it is a ' - 'Kernel vulnerability.', vulnerability.id) - return vulnerability - - logging.info('Generating Vanir signatures for %s', vulnerability.id) - try: - vuln_manager = vulnerability_manager.generate_from_json_string( - content=json.dumps([ - json_format.MessageToDict( - vulnerability, preserving_proto_field_name=True) - ]),) - vuln_manager.generate_signatures() - - if not vuln_manager.vulnerabilities: - logging.warning('Vanir signature generation resulted in no ' - 'vulnerabilities.') - return vulnerability - - return vuln_manager.vulnerabilities[0].to_proto() - except Exception: - logging.exception('Failed to generate Vanir signatures for %s', - vulnerability.id) - return vulnerability - def _do_update(self, source_repo, repo, vulnerability, relative_path, original_sha256): """Process updates on a vulnerability.""" @@ -552,7 +512,6 @@ def _do_update(self, source_repo, repo, vulnerability, relative_path, orig_modified_date = vulnerability.modified.ToDatetime(datetime.UTC) # Fully enrich the vulnerability object in memory. - vulnerability = self._generate_vanir_signatures(vulnerability) try: result = self._analyze_vulnerability(source_repo, repo, vulnerability, relative_path, original_sha256) diff --git a/gcp/workers/vanir_signatures/Dockerfile b/gcp/workers/vanir_signatures/Dockerfile new file mode 100644 index 00000000000..ea1a363acd9 --- /dev/null +++ b/gcp/workers/vanir_signatures/Dockerfile @@ -0,0 +1,19 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM gcr.io/oss-vdb/worker + +COPY vanir_signatures.py /usr/local/bin/vanir_signatures.py +RUN chmod 755 /usr/local/bin/vanir_signatures.py +ENTRYPOINT ["vanir_signatures.py"] diff --git a/gcp/workers/vanir_signatures/run_tests.sh b/gcp/workers/vanir_signatures/run_tests.sh new file mode 100755 index 00000000000..8031da4464a --- /dev/null +++ b/gcp/workers/vanir_signatures/run_tests.sh @@ -0,0 +1,22 @@ +#!/bin/bash -ex +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cd ../worker + +# Install dependencies only if not running in Cloud Build +if [ -z "$CLOUDBUILD" ]; then + poetry sync +fi +poetry run python ../vanir_signatures/vanir_signatures_test.py diff --git a/gcp/workers/vanir_signatures/vanir_signatures.py b/gcp/workers/vanir_signatures/vanir_signatures.py new file mode 100644 index 00000000000..8cbdb1820d4 --- /dev/null +++ b/gcp/workers/vanir_signatures/vanir_signatures.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Cron job to generate Vanir signatures for modified vulnerabilities.""" + +import argparse +import datetime +import json +import logging +from concurrent import futures + +from google.cloud import ndb +from google.protobuf import json_format + +import osv +import osv.logs +import osv.models +import osv.gcs +from osv import vulnerability_pb2 + +from vanir import vulnerability_manager + +JOB_NAME = 'vanir_signatures' +JOB_DATA_LAST_RUN = 'vanir_signatures_last_run' + + +def _generate_vanir_signatures_batch( + vulnerabilities: list[vulnerability_pb2.Vulnerability] +) -> dict[str, list[vulnerability_pb2.Vulnerability]]: + """Generates Vanir signatures for a batch of vulnerabilities.""" + if not vulnerabilities: + return {} + + logging.info('Generating Vanir signatures for batch of %d', + len(vulnerabilities)) + + try: + vuln_dicts = [ + json_format.MessageToDict(v, preserving_proto_field_name=True) + for v in vulnerabilities + ] + vuln_manager = vulnerability_manager.generate_from_json_string( + content=json.dumps(vuln_dicts)) + vuln_manager.generate_signatures() + + if not vuln_manager.vulnerabilities: + logging.warning('Vanir signature generation resulted in no ' + 'vulnerabilities.') + return {v.id: [v] for v in vulnerabilities} + + results = {} + for v in vuln_manager.vulnerabilities: + proto = v.to_proto() + if proto.id not in results: + results[proto.id] = [] + results[proto.id].append(proto) + + # Ensure all input IDs are in results, even if they weren't enriched + for v in vulnerabilities: + if v.id not in results: + results[v.id] = [v] + + return results + + except Exception: + logging.exception('Failed to generate Vanir signatures for batch of %d', + len(vulnerabilities)) + return {v.id: [v] for v in vulnerabilities} + + +def affected_is_kernel(affected: vulnerability_pb2.Affected) -> bool: + """Returns True if the affected package is a Linux kernel.""" + if (affected.package.name == 'Kernel' and + affected.package.ecosystem == 'Linux'): + return True + + if any('git.kernel.org/pub/scm/linux/kernel/git' in ar.repo + for ar in affected.ranges): + return True + + return False + + +def process_batch(vuln_ids: list[str], + dry_run: bool = False, + max_workers: int = 20) -> int: + """Process a batch of vulnerabilities.""" + if not vuln_ids: + return 0 + + logging.info('Processing batch of %d vulnerabilities', len(vuln_ids)) + + # Parallel fetch OSV records from GCS + vulnerabilities_to_process = [] + gcs_generations = {} + + with futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + + results = list(executor.map(osv.gcs.get_by_id_with_generation, vuln_ids)) + + for i, res in enumerate(results): + vuln_id = vuln_ids[i] + if not res: + logging.warning('Vulnerability %s not found in GCS', vuln_id) + continue + + vulnerability, gcs_gen = res + + # Filter + if not any(r.type == vulnerability_pb2.Range.GIT + for affected in vulnerability.affected + for r in affected.ranges): + logging.debug('Skipping %s: no GIT affected ranges.', vuln_id) + continue + + if any( + affected_is_kernel(affected) for affected in vulnerability.affected): + logging.debug('Skipping %s: it is a Kernel vulnerability', vuln_id) + continue + + if vulnerability.HasField('withdrawn'): + logging.debug('Skipping %s: it is withdrawn', vuln_id) + continue + + vulnerabilities_to_process.append(vulnerability) + gcs_generations[vulnerability.id] = gcs_gen + + if not vulnerabilities_to_process: + return 0 + + # Batch signature generation + batch_results = _generate_vanir_signatures_batch(vulnerabilities_to_process) + + # Collect all enriched vulnerabilities + all_enriched = [] + for original_vuln in vulnerabilities_to_process: + enriched = batch_results.get(original_vuln.id, [original_vuln]) + for v in enriched: + if v != original_vuln: + all_enriched.append(v) + + if not all_enriched: + return 0 + + if dry_run: + for enriched_vuln in all_enriched: + logging.info('Dry run: would have updated %s', enriched_vuln.id) + return len(all_enriched) + + # Update Datastore and GCS + updated_count = 0 + + # Batch fetch Vulnerabilities from Datastore + vuln_keys = [ndb.Key(osv.models.Vulnerability, v.id) for v in all_enriched] + vulns = ndb.get_multi(vuln_keys) + + for v, vuln in zip(all_enriched, vulns): + if not vuln: + logging.error('Vulnerability %s not found in Datastore', v.id) + continue + + now = osv.utcnow() + v.modified.FromDatetime(now) + vuln.modified = now + + # Update Vulnerability entity in Datastore + vuln.put() + + # Use gcs_generations[original_id] ONLY if it matches enriched ID. + gen = gcs_generations.get(v.id) + try: + osv.gcs.upload_vulnerability(v, gen) + updated_count += 1 + except Exception: + logging.exception('Failed to upload vulnerability %s to GCS', v.id) + + return updated_count + + +def main(): + """Main entry point for the cron job.""" + parser = argparse.ArgumentParser(description='Vanir signatures cron job.') + parser.add_argument( + '--batch-size', + type=int, + default=500, + help='Number of vulnerabilities to process in each batch.') + parser.add_argument( + '--max-workers', + type=int, + default=20, + help='Maximum number of parallel workers.') + parser.add_argument( + '--dry-run', action='store_true', help='Perform a dry run.') + parser.add_argument( + '--hours', + type=int, + help='Number of hours back to process modified records.') + args = parser.parse_args() + + if args.dry_run: + logging.getLogger().setLevel(logging.DEBUG) + + last_run_key = ndb.Key(osv.models.JobData, JOB_DATA_LAST_RUN) + last_run_data = last_run_key.get() + + # Capture current time to use as last_run for the next time. + current_run = osv.utcnow() + + if args.hours: + last_run = current_run - datetime.timedelta(hours=args.hours) + logging.info( + 'Running Vanir signature generation for the last %d hours (since %s)', + args.hours, last_run) + else: + last_run = last_run_data.value if last_run_data else None + if last_run: + logging.info('Running Vanir signature generation since last run: %s', + last_run) + else: + logging.info('No last run found, querying all vulnerabilities.') + + # Single global query for modified vulnerabilities + query = osv.models.Vulnerability.query() + if last_run: + query = query.filter(osv.models.Vulnerability.modified > last_run) + + total_generated_count = 0 + total_processed_count = 0 + + with futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor: + + def process_with_context(batch): + with ndb.Client().context(): + return process_batch(batch, args.dry_run, args.max_workers) + + future_to_batch = {} + current_batch = [] + + logging.info('Streaming vulnerabilities for processing.') + for key in query.iter(keys_only=True): + current_batch.append(key.id()) + if len(current_batch) >= args.batch_size: + f = executor.submit(process_with_context, current_batch) + future_to_batch[f] = current_batch + current_batch = [] + + if current_batch: + f = executor.submit(process_with_context, current_batch) + future_to_batch[f] = current_batch + + if not future_to_batch: + logging.info('No modified vulnerabilities found.') + else: + logging.info('Processing %d batches of vulnerabilities.', + len(future_to_batch)) + for future in futures.as_completed(future_to_batch): + try: + generated = future.result() + total_generated_count += generated + total_processed_count += len(future_to_batch[future]) + except Exception: + logging.exception('Failed to process a batch of vulnerabilities') + + logging.info('Processed %d vulnerabilities, generated %d new signatures.', + total_processed_count, total_generated_count) + + if args.dry_run: + logging.info('Dry run: would have updated last_run to %s', current_run) + return + + # Update last_run timestamp + if not last_run_data: + last_run_data = osv.models.JobData(id=JOB_DATA_LAST_RUN) + + last_run_data.value = current_run + last_run_data.put() + + +if __name__ == '__main__': + _ndb_client = ndb.Client() + osv.logs.setup_gcp_logging(JOB_NAME) + with _ndb_client.context(): + main() diff --git a/gcp/workers/vanir_signatures/vanir_signatures_test.py b/gcp/workers/vanir_signatures/vanir_signatures_test.py new file mode 100644 index 00000000000..b95dd69ba4e --- /dev/null +++ b/gcp/workers/vanir_signatures/vanir_signatures_test.py @@ -0,0 +1,164 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for vanir_signatures.""" + +import unittest +from unittest import mock + +from google.cloud import ndb + +import osv +import osv.tests +import vanir_signatures +from osv import vulnerability_pb2 + + +class VanirSignaturesTest(unittest.TestCase): + """Tests for vanir_signatures.""" + + @classmethod + def setUpClass(cls): + cls.emulator = cls.enterClassContext(osv.tests.datastore_emulator()) + cls.enterClassContext(ndb.Client().context(cache_policy=False)) + + def setUp(self): + self.emulator.reset() + + @mock.patch('osv.gcs.get_by_id_with_generation') + def test_process_batch_skip_no_git_ranges(self, mock_get_gcs): + """Test skipping when no GIT ranges are present.""" + vuln_id = 'VULN-1' + vuln = vulnerability_pb2.Vulnerability(id=vuln_id) + vuln.affected.add() + + mock_get_gcs.return_value = (vuln, '123') + + with self.assertLogs(level='DEBUG') as cm: + result = vanir_signatures.process_batch([vuln_id]) + self.assertEqual(result, 0) + self.assertTrue(any('no GIT affected ranges' in log for log in cm.output)) + + @mock.patch('osv.gcs.get_by_id_with_generation') + def test_process_batch_skip_kernel(self, mock_get_gcs): + """Test skipping kernel vulnerabilities.""" + vuln_id = 'VULN-1' + vuln = vulnerability_pb2.Vulnerability(id=vuln_id) + affected = vuln.affected.add() + affected.package.name = 'Kernel' + affected.package.ecosystem = 'Linux' + affected.ranges.add( + type=vulnerability_pb2.Range.GIT, + repo='https://example.com/kernel-repo') + + mock_get_gcs.return_value = (vuln, '123') + + with self.assertLogs(level='DEBUG') as cm: + result = vanir_signatures.process_batch([vuln_id]) + self.assertEqual(result, 0) + self.assertTrue( + any('is a Kernel vulnerability' in log for log in cm.output)) + + @mock.patch('osv.gcs.get_by_id_with_generation') + def test_process_batch_skip_withdrawn(self, mock_get_gcs): + """Test skipping withdrawn vulnerabilities.""" + vuln_id = 'VULN-1' + vuln = vulnerability_pb2.Vulnerability(id=vuln_id) + vuln.withdrawn.FromSeconds(1234567890) + affected = vuln.affected.add() + affected.ranges.add( + type=vulnerability_pb2.Range.GIT, repo='https://example.com/repo') + + mock_get_gcs.return_value = (vuln, '123') + + with self.assertLogs(level='DEBUG') as cm: + result = vanir_signatures.process_batch([vuln_id]) + self.assertEqual(result, 0) + self.assertTrue(any('it is withdrawn' in log for log in cm.output)) + + @mock.patch('osv.gcs.get_by_id_with_generation') + @mock.patch('osv.gcs.upload_vulnerability') + @mock.patch('vanir_signatures._generate_vanir_signatures_batch') + def test_process_batch_success(self, mock_gen_signatures, mock_upload, + mock_get_gcs): + """Test successful signature generation.""" + vuln_id = 'VULN-1' + + # Input vulnerability + vuln = vulnerability_pb2.Vulnerability(id=vuln_id) + affected = vuln.affected.add() + affected.ranges.add( + type=vulnerability_pb2.Range.GIT, repo='https://example.com/repo') + + mock_get_gcs.return_value = (vuln, '123') + + # Mock generation result + enriched_vuln = vulnerability_pb2.Vulnerability() + enriched_vuln.CopyFrom(vuln) + enriched_vuln.affected[0].database_specific['vanir_signatures'] = [{ + 'id': 'sig1' + }] + mock_gen_signatures.return_value = {vuln_id: [enriched_vuln]} + + # Setup Datastore Vulnerability + vuln_entity = osv.Vulnerability(id=vuln_id) + vuln_entity.put() + + result = vanir_signatures.process_batch([vuln_id]) + + self.assertEqual(result, 1) + mock_upload.assert_called_once() + mock_gen_signatures.assert_called_once_with([vuln]) + + # Verify Datastore update + updated_vuln = osv.Vulnerability.get_by_id(vuln_id) + self.assertIsNotNone(updated_vuln.modified) + + @mock.patch('osv.models.Vulnerability.query') + @mock.patch('vanir_signatures.process_batch') + def test_global_batching(self, mock_process_batch, mock_vuln_query): + """Test performing global batching of all found vulnerabilities.""" + # Mock Vulnerability query with 150 items + vuln_keys = [mock.Mock() for _ in range(150)] + for i, k in enumerate(vuln_keys): + k.id.return_value = f'VULN-{i}' + + mock_query = mock.Mock() + mock_vuln_query.return_value = mock_query + mock_query.filter.return_value = mock_query + mock_query.iter.return_value = vuln_keys + + mock_process_batch.return_value = 10 + + # Run main with dry-run and batch_size=100 + with mock.patch( + 'argparse.ArgumentParser.parse_args', + return_value=mock.Mock( + dry_run=True, batch_size=100, max_workers=20, hours=None)): + vanir_signatures.main() + + # Verify process_batch was called for each chunk (BATCH_SIZE=100) + # 150 items -> 2 batches + self.assertEqual(mock_process_batch.call_count, 2) + + # First batch of 100 + expected_batch1 = [f'VULN-{i}' for i in range(100)] + mock_process_batch.assert_any_call(expected_batch1, True, 20) + + # Second batch of 50 + expected_batch2 = [f'VULN-{i}' for i in range(100, 150)] + mock_process_batch.assert_any_call(expected_batch2, True, 20) + + +if __name__ == '__main__': + unittest.main() diff --git a/gcp/workers/worker/testdata/RESTUpdateTest_update_no_introduced.txt b/gcp/workers/worker/testdata/RESTUpdateTest_update_no_introduced.txt index 8acf0e2cf80..fbce3c1ce52 100644 --- a/gcp/workers/worker/testdata/RESTUpdateTest_update_no_introduced.txt +++ b/gcp/workers/worker/testdata/RESTUpdateTest_update_no_introduced.txt @@ -1,24 +1,4 @@ -{ 'affected': [ { 'database_specific': { 'source': 'http://localhost:8000/CURL-CVE-2022-32221.json', - 'vanir_signatures': [ { 'deprecated': False, - 'digest': { 'function_hash': '22968065415160735040135778472335782425', - 'length': 58084.0}, - 'id': 'CURL-CVE-2022-32221-9751f04c', - 'signature_type': 'Function', - 'signature_version': 'v1', - 'source': 'https://github.com/curl/curl.git/commit/a64e3e59938abd7d667e4470a18072a24d7e9de9', - 'target': { 'file': 'lib/setopt.c', - 'function': 'Curl_vsetopt'}}, - { 'deprecated': False, - 'digest': { 'line_hashes': [ '73596727404438881622769716353410783065', - '150108665408450698810391826671290668314', - '264542534956227828232279400943172691231', - '248438938282829223471764231064667949049'], - 'threshold': 0.9}, - 'id': 'CURL-CVE-2022-32221-b7951194', - 'signature_type': 'Line', - 'signature_version': 'v1', - 'source': 'https://github.com/curl/curl.git/commit/a64e3e59938abd7d667e4470a18072a24d7e9de9', - 'target': { 'file': 'lib/setopt.c'}}]}, +{ 'affected': [ { 'database_specific': { 'source': 'http://localhost:8000/CURL-CVE-2022-32221.json'}, 'ranges': [ { 'events': [ {'introduced': '7.7'}, {'fixed': '7.86.0'}], 'type': 'SEMVER'}, diff --git a/gcp/workers/worker/testdata/UpdateTest_last_affected_git.txt b/gcp/workers/worker/testdata/UpdateTest_last_affected_git.txt index 15c1c4cc099..b466919a17a 100644 --- a/gcp/workers/worker/testdata/UpdateTest_last_affected_git.txt +++ b/gcp/workers/worker/testdata/UpdateTest_last_affected_git.txt @@ -3,11 +3,11 @@ { 'introduced': 'febfac1940086bc1f6d3dc33fda0a1d1ba336209'}], 'repo': 'https://osv-test/repo/url', 'type': 'GIT'}], - 'versions': ['branch-v0.1.1', - 'branch-v0.1.1-with-fix', - 'branch_1_cherrypick_regress', - 'v0.1.1', - 'v0.2']}], + 'versions': [ 'branch-v0.1.1', + 'branch-v0.1.1-with-fix', + 'branch_1_cherrypick_regress', + 'v0.1.1', + 'v0.2']}], 'details': 'Blah blah blah\nBlah\n', 'id': 'OSV-TEST-last-affected-01', 'modified': '3000-01-01T00:00:00Z', diff --git a/gcp/workers/worker/testdata/UpdateTest_normalized_pypi_pubsub_calls.txt b/gcp/workers/worker/testdata/UpdateTest_normalized_pypi_pubsub_calls.txt index 109622dc0d6..b4c1a4acf69 100644 --- a/gcp/workers/worker/testdata/UpdateTest_normalized_pypi_pubsub_calls.txt +++ b/gcp/workers/worker/testdata/UpdateTest_normalized_pypi_pubsub_calls.txt @@ -1 +1 @@ -[call('projects/test-osv/topics/pypi-bridge', data=b'{"id": "PYSEC-456", "summary": "A vulnerability in an unnormalized package", "details": "Blah blah blah\\nBlah\\n", "modified": "3000-01-01T00:00:00Z", "published": "3000-01-01T00:00:00Z", "references": [{"type": "WEB", "url": "https://ref.com/ref"}], "affected": [{"package": {"name": "scrapy", "ecosystem": "PyPI", "purl": "pkg:pypi/scrapy"}, "ranges": [{"type": "ECOSYSTEM", "events": [{"introduced": "1.14.2"}, {"fixed": "1.31.0"}]}, {"type": "GIT", "repo": "https://osv-test/repo/url", "events": [{"introduced": "eefe8ec3f1f90d0e684890e810f3f21e8500a4cd"}, {"fixed": "8d8242f545e9cec3e6d0d2e3f5bde8be1c659735"}]}], "versions": []}], "schema_version": "SCHEMA_VERSION"}')] \ No newline at end of file +[ call('projects/test-osv/topics/pypi-bridge', data=b'{"id": "PYSEC-456", "summary": "A vulnerability in an unnormalized package", "details": "Blah blah blah\\nBlah\\n", "modified": "3000-01-01T00:00:00Z", "published": "3000-01-01T00:00:00Z", "references": [{"type": "WEB", "url": "https://ref.com/ref"}], "affected": [{"package": {"name": "scrapy", "ecosystem": "PyPI", "purl": "pkg:pypi/scrapy"}, "ranges": [{"type": "ECOSYSTEM", "events": [{"introduced": "1.14.2"}, {"fixed": "1.31.0"}]}, {"type": "GIT", "repo": "https://osv-test/repo/url", "events": [{"introduced": "eefe8ec3f1f90d0e684890e810f3f21e8500a4cd"}, {"fixed": "8d8242f545e9cec3e6d0d2e3f5bde8be1c659735"}]}], "versions": []}], "schema_version": "SCHEMA_VERSION"}')] \ No newline at end of file diff --git a/gcp/workers/worker/testdata/UpdateTest_pypi_pubsub_calls.txt b/gcp/workers/worker/testdata/UpdateTest_pypi_pubsub_calls.txt index 51f4eaac423..4737cbad84b 100644 --- a/gcp/workers/worker/testdata/UpdateTest_pypi_pubsub_calls.txt +++ b/gcp/workers/worker/testdata/UpdateTest_pypi_pubsub_calls.txt @@ -1 +1 @@ -[call('projects/test-osv/topics/pypi-bridge', data=b'{"id": "PYSEC-123", "summary": "A vulnerability", "details": "Blah blah blah\\nBlah\\n", "modified": "3000-01-01T00:00:00Z", "published": "3000-01-01T00:00:00Z", "references": [{"type": "WEB", "url": "https://ref.com/ref"}], "affected": [{"package": {"name": "grpcio", "ecosystem": "PyPI", "purl": "pkg:pypi/grpcio"}, "ranges": [{"type": "ECOSYSTEM", "events": [{"introduced": "1.14.2"}, {"fixed": "1.31.0"}]}, {"type": "GIT", "repo": "https://osv-test/repo/url", "events": [{"introduced": "eefe8ec3f1f90d0e684890e810f3f21e8500a4cd"}, {"fixed": "8d8242f545e9cec3e6d0d2e3f5bde8be1c659735"}]}], "versions": ["1.14.2", "1.15.0", "1.15.0rc1", "1.16.0", "1.16.0rc1", "1.16.1", "1.17.0", "1.17.1", "1.18.0", "1.19.0", "1.20.0", "1.20.0rc1", "1.20.0rc2", "1.20.0rc3", "1.20.1", "1.21.0rc1", "1.21.1", "1.21.1rc1", "1.22.0", "1.22.0rc1", "1.22.1", "1.23.0", "1.23.0rc1", "1.23.1", "1.24.0", "1.24.0rc1", "1.24.1", "1.24.3", "1.25.0", "1.25.0rc1", "1.26.0", "1.26.0rc1", "1.27.0rc1", "1.27.0rc2", "1.27.1", "1.27.2", "1.28.0rc1", "1.28.0rc2", "1.28.1", "1.29.0", "1.30.0"]}], "schema_version": "SCHEMA_VERSION"}')] \ No newline at end of file +[ call('projects/test-osv/topics/pypi-bridge', data=b'{"id": "PYSEC-123", "summary": "A vulnerability", "details": "Blah blah blah\\nBlah\\n", "modified": "3000-01-01T00:00:00Z", "published": "3000-01-01T00:00:00Z", "references": [{"type": "WEB", "url": "https://ref.com/ref"}], "affected": [{"package": {"name": "grpcio", "ecosystem": "PyPI", "purl": "pkg:pypi/grpcio"}, "ranges": [{"type": "ECOSYSTEM", "events": [{"introduced": "1.14.2"}, {"fixed": "1.31.0"}]}, {"type": "GIT", "repo": "https://osv-test/repo/url", "events": [{"introduced": "eefe8ec3f1f90d0e684890e810f3f21e8500a4cd"}, {"fixed": "8d8242f545e9cec3e6d0d2e3f5bde8be1c659735"}]}], "versions": ["1.14.2", "1.15.0", "1.15.0rc1", "1.16.0", "1.16.0rc1", "1.16.1", "1.17.0", "1.17.1", "1.18.0", "1.19.0", "1.20.0", "1.20.0rc1", "1.20.0rc2", "1.20.0rc3", "1.20.1", "1.21.0rc1", "1.21.1", "1.21.1rc1", "1.22.0", "1.22.0rc1", "1.22.1", "1.23.0", "1.23.0rc1", "1.23.1", "1.24.0", "1.24.0rc1", "1.24.1", "1.24.3", "1.25.0", "1.25.0rc1", "1.26.0", "1.26.0rc1", "1.27.0rc1", "1.27.0rc2", "1.27.1", "1.27.2", "1.28.0rc1", "1.28.0rc2", "1.28.1", "1.29.0", "1.30.0"]}], "schema_version": "SCHEMA_VERSION"}')] \ No newline at end of file diff --git a/gcp/workers/worker/testdata/UpdateTest_update_bucket_cve.txt b/gcp/workers/worker/testdata/UpdateTest_update_bucket_cve.txt index cdded136948..20f0c000b48 100644 --- a/gcp/workers/worker/testdata/UpdateTest_update_bucket_cve.txt +++ b/gcp/workers/worker/testdata/UpdateTest_update_bucket_cve.txt @@ -1,71 +1,4 @@ -{ 'affected': [ { 'database_specific': { 'vanir_signatures': [ { 'deprecated': False, - 'digest': { 'line_hashes': [ '18066036635502801806677364178756254862', - '88369412895184753394283011451803187548', - '50848458948504730426650075084385046530', - '91284993680127737564993618090545145416', - '30779278950355321333621475605602830830', - '122421578121241373365155348152646941523', - '267652210589392654099845994262755826062', - '334808111126213430220547654602188383660', - '234389204524678077984531197469034242690', - '152880517379272209571165325006789878786', - '299871312446227378724863519270618301341', - '157634544376100154879962283397081738110', - '103663099829328578689797223848801574827', - '158563421165358858389893196995983570762', - '315965584007238676040631750953088200664'], - 'threshold': 0.9}, - 'id': 'CVE-2016-15011-929806e0', - 'signature_type': 'Line', - 'signature_version': 'v1', - 'source': 'https://github.com/e-contract/dssp/commit/ec4238349691ec66dd30b416ec6eaab02d722302', - 'target': { 'file': 'dssp-client/src/main/java/be/e_contract/dssp/client/metadata/DigitalSignatureServiceMetadata.java'}}, - { 'deprecated': False, - 'digest': { 'line_hashes': [ '6674387965125354881111149989428882853', - '100741820504985357262218153349452233434', - '253366101641995550384755812786879052342', - '245037096886845520996519599411616661529', - '158852189579109359359946013476030639584', - '298614597347537877121532413760030558894', - '180181956682520524395173299138562004562', - '146502839243717526526406585366671557144', - '244996413515733361838850122849344525825', - '166554563875570093109470347687697544350', - '9427977046515615106319032886256396870', - '279044285883194738631442483325879094037', - '295198785562376785392477306514392217432', - '44714085523243422643465698936438435501', - '267608316591780380179772018605253867646', - '182074437577114148436758739432546664545', - '87621961972550109442760282702331746920', - '64897152403082006856773989396486955494', - '184138636401118235309885205539354874180', - '62633257528035095954429323509732904426'], - 'threshold': 0.9}, - 'id': 'CVE-2016-15011-bd561b7b', - 'signature_type': 'Line', - 'signature_version': 'v1', - 'source': 'https://github.com/e-contract/dssp/commit/ec4238349691ec66dd30b416ec6eaab02d722302', - 'target': { 'file': 'dssp-client/src/main/java/be/e_contract/dssp/client/SignResponseVerifier.java'}}, - { 'deprecated': False, - 'digest': { 'function_hash': '259495117689681377355427521574538727644', - 'length': 1591.0}, - 'id': 'CVE-2016-15011-d557e328', - 'signature_type': 'Function', - 'signature_version': 'v1', - 'source': 'https://github.com/e-contract/dssp/commit/ec4238349691ec66dd30b416ec6eaab02d722302', - 'target': { 'file': 'dssp-client/src/main/java/be/e_contract/dssp/client/metadata/DigitalSignatureServiceMetadata.java', - 'function': 'DigitalSignatureServiceMetadata'}}, - { 'deprecated': False, - 'digest': { 'function_hash': '249451297539985081987952306682300702892', - 'length': 4302.0}, - 'id': 'CVE-2016-15011-fcf07dd1', - 'signature_type': 'Function', - 'signature_version': 'v1', - 'source': 'https://github.com/e-contract/dssp/commit/ec4238349691ec66dd30b416ec6eaab02d722302', - 'target': { 'file': 'dssp-client/src/main/java/be/e_contract/dssp/client/SignResponseVerifier.java', - 'function': 'checkSignResponse'}}]}, - 'ranges': [ { 'events': [ {'introduced': '0'}, +{ 'affected': [ { 'ranges': [ { 'events': [ {'introduced': '0'}, { 'fixed': '001ef99b0c8194468de960d007e2d82dcebc3bca'}, { 'fixed': 'ec4238349691ec66dd30b416ec6eaab02d722302'}], 'repo': 'https://github.com/e-contract/dssp', diff --git a/gcp/workers/worker/worker.py b/gcp/workers/worker/worker.py index 0c29859e14b..a64a6fe5b2f 100644 --- a/gcp/workers/worker/worker.py +++ b/gcp/workers/worker/worker.py @@ -33,7 +33,7 @@ from google.cloud import pubsub_v1 from google.cloud import storage from google.cloud.storage import retry -from google.protobuf import json_format, timestamp_pb2 +from google.protobuf import timestamp_pb2 sys.path.append(os.path.dirname(os.path.realpath(__file__))) import osv @@ -44,8 +44,6 @@ from osv import vulnerability_pb2, purl_helpers import oss_fuzz -from vanir import vulnerability_manager - DEFAULT_WORK_DIR = '/work' OSS_FUZZ_GIT_URL = 'https://github.com/google/oss-fuzz.git' TASK_SUBSCRIPTION = 'tasks' @@ -500,43 +498,6 @@ def _analyze_vulnerability(self, source_repo: osv.SourceRepository, vulnerability.id) raise UpdateConflictError - def _generate_vanir_signatures( - self, vulnerability: vulnerability_pb2.Vulnerability - ) -> vulnerability_pb2.Vulnerability: - """Generates Vanir signatures for a vulnerability.""" - if not any(r.type == vulnerability_pb2.Range.GIT - for affected in vulnerability.affected - for r in affected.ranges): - logging.info( - 'Skipping Vanir signature generation for %s as it has no ' - 'GIT affected ranges.', vulnerability.id) - return vulnerability - if any(affected_is_kernel(affected) for affected in vulnerability.affected): - logging.info( - 'Skipping Vanir signature generation for %s as it is a ' - 'Kernel vulnerability.', vulnerability.id) - return vulnerability - - logging.info('Generating Vanir signatures for %s', vulnerability.id) - try: - vuln_manager = vulnerability_manager.generate_from_json_string( - content=json.dumps([ - json_format.MessageToDict( - vulnerability, preserving_proto_field_name=True) - ]),) - vuln_manager.generate_signatures() - - if not vuln_manager.vulnerabilities: - logging.warning('Vanir signature generation resulted in no ' - 'vulnerabilities.') - return vulnerability - - return vuln_manager.vulnerabilities[0].to_proto() - except Exception: - logging.exception('Failed to generate Vanir signatures for %s', - vulnerability.id) - return vulnerability - def _do_update(self, source_repo: osv.SourceRepository, repo: pygit2.Repository | None, vulnerability: vulnerability_pb2.Vulnerability, @@ -552,7 +513,6 @@ def _do_update(self, source_repo: osv.SourceRepository, orig_modified_date = vulnerability.modified.ToDatetime(datetime.UTC) # Fully enrich the vulnerability object in memory. - vulnerability = self._generate_vanir_signatures(vulnerability) if any(affected_is_kernel(affected) for affected in vulnerability.affected): result = None logging.info( diff --git a/osv/gcs.py b/osv/gcs.py index 302d4e3272e..f8dfa98e228 100644 --- a/osv/gcs.py +++ b/osv/gcs.py @@ -18,6 +18,7 @@ from google.cloud import exceptions from google.cloud import storage +from requests.adapters import HTTPAdapter from .vulnerability_pb2 import Vulnerability diff --git a/osv/models.py b/osv/models.py index 263c0697598..067c1e40d80 100644 --- a/osv/models.py +++ b/osv/models.py @@ -947,7 +947,7 @@ class Vulnerability(ndb.Model): """A Vulnerability entry. Contains a minimal amount of information of an OSV record, including the - overall modified date, an some raw fields that are overwritten by our + overall modified date, an some raw fields that are overwritten by our enrichment. The entity's key/id is ID in OSV. @@ -973,6 +973,14 @@ class Vulnerability(ndb.Model): upstream_raw: list[str] = ndb.StringProperty(repeated=True) +# --- JobData --- + + +class JobData(ndb.Model): + """Job data.""" + value = ndb.GenericProperty() + + # --- Affected versions for matching ---