From 36902010050defff9ec4409831bf87ef85074425 Mon Sep 17 00:00:00 2001 From: stephhou <167300771+stephhou@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:39:46 +0100 Subject: [PATCH 01/11] fix: wip-creating export list for incentive --- .../export_email_addresses_for_incentives.sql | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 scripts/sql/export_email_addresses_for_incentives.sql diff --git a/scripts/sql/export_email_addresses_for_incentives.sql b/scripts/sql/export_email_addresses_for_incentives.sql new file mode 100644 index 00000000..6f95ae7a --- /dev/null +++ b/scripts/sql/export_email_addresses_for_incentives.sql @@ -0,0 +1,84 @@ +-- This script: +-- 1 - creates a list of email addresses of participants eligible for incentives +-- 2 - updates participants with incentives so that they are timestamped as having received an incentive +-- 3 Incentive eligibility is based on: +-- - having submitted the questionnaire +-- - not having already received an incentive +-- - Submitted online questionnaire before telephone questionnaire conducted date + +-- Find container name: +podman ps --format "{{.Names}}" +-- Copy partner csv file to server temp directory +podman cp "/Users/stephhousden/Downloads/SampleDataPIDRemoved230126.csv" :/tmp/partner.csv + +podman cp "/Users/stephhousden/Downloads/SampleDataPIDRemoved230126.csv" lung_cancer_screening-db-1:/tmp/partner.csv + +-- login to psql on local container +psql -h localhost -p 5432 -U lung_cancer_screening -d lung_cancer_screening + + +-- Create table for partner data import + +-- single line command for psql: +CREATE TABLE incentive_partner_import_raw (nhs_number TEXT, date_of_birth TEXT, date_conducted TEXT, smoking_status TEXT, average_cigarettes_per_day_while_smoking TEXT, duration_smoked_years TEXT, years_since_quitting_smoking TEXT, height_measurement_type TEXT, height_measurement_value_metric_cm TEXT, weight_measurement_type TEXT, weight_measurement_value_metric_kg TEXT, previous_respiratory_diagnosis TEXT, personal_history_of_previous_cancer TEXT, family_history_of_lung_cancer TEXT, personal_history_of_asthma TEXT, asbestos_exposure_from_job_or_activity TEXT, education TEXT, ethnicity TEXT, plco_lung_cancer_risk_score TEXT, llp_lung_cancer_risk_score TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT now()); + +CREATE TABLE incentive_partner_import_raw ( + nhs_number TEXT, + date_of_birth TEXT, + date_conducted TEXT, + smoking_status TEXT, + average_cigarettes_per_day_while_smoking TEXT, + duration_smoked_years TEXT, + years_since_quitting_smoking TEXT, + height_measurement_type TEXT, + height_measurement_value_metric_cm TEXT, + weight_measurement_type TEXT, + weight_measurement_value_metric_kg TEXT, + previous_respiratory_diagnosis TEXT, + personal_history_of_previous_cancer TEXT, + family_history_of_lung_cancer TEXT, + personal_history_of_asthma TEXT, + asbestos_exposure_from_job_or_activity TEXT, + education TEXT, + ethnicity TEXT, + plco_lung_cancer_risk_score TEXT, + llp_lung_cancer_risk_score TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + + +SELECT * FROM incentive_partner_import_raw; + +-- psql command to import data from CSV file into the temp table + +-- One line command for psql: +\copy incentive_partner_import_raw (nhs_number, date_of_birth, date_conducted, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) FROM '/tmp/partner.csv' WITH (FORMAT csv, HEADER true); + +\copy incentive_partner_import_raw ( + nhs_number, + date_of_birth, + date_conducted, + smoking_status, + average_cigarettes_per_day_while_smoking, + duration_smoked_years, + years_since_quitting_smoking, + height_measurement_type, + height_measurement_value_metric_cm, + weight_measurement_type, + weight_measurement_value_metric_kg, + previous_respiratory_diagnosis, + personal_history_of_previous_cancer, + family_history_of_lung_cancer, + personal_history_of_asthma, + asbestos_exposure_from_job_or_activity, + education, + ethnicity, + plco_lung_cancer_risk_score, + llp_lung_cancer_risk_score +) FROM '/Users/stephhousden/Downloads/SampleDataPIDRemoved230126.csv' WITH (FORMAT csv, HEADER true); + + +-- Select distinct email addresses of eligible participants based on criteria outlined at the top of this file + +-- One line command for psql: +SELECT qu.email FROM questions_user qu JOIN questions_responseset qrs ON qrs.user_id = qu.id JOIN incentive_partner_import_raw ipir ON ipir.nhs_number = qu.nhs_number WHERE (NULLIF(ipir.date_conducted, '')::timestamptz > qrs.submitted_at); From 65af7fdd08d4408021485d896aa062eeacb29977 Mon Sep 17 00:00:00 2001 From: stephhou <167300771+stephhou@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:43:18 +0100 Subject: [PATCH 02/11] fix: wip-creating export --- scripts/sql/export_email_addresses_for_incentives.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/sql/export_email_addresses_for_incentives.sql b/scripts/sql/export_email_addresses_for_incentives.sql index 6f95ae7a..eca4e36f 100644 --- a/scripts/sql/export_email_addresses_for_incentives.sql +++ b/scripts/sql/export_email_addresses_for_incentives.sql @@ -9,9 +9,9 @@ -- Find container name: podman ps --format "{{.Names}}" -- Copy partner csv file to server temp directory -podman cp "/Users/stephhousden/Downloads/SampleDataPIDRemoved230126.csv" :/tmp/partner.csv +podman cp "/Users/*YourUsername*/Downloads/SampleDataPIDRemoved230126.csv" :/tmp/partner.csv -podman cp "/Users/stephhousden/Downloads/SampleDataPIDRemoved230126.csv" lung_cancer_screening-db-1:/tmp/partner.csv +podman cp "/Users/*YourUsername*/Downloads/SampleDataPIDRemoved230126.csv" lung_cancer_screening-db-1:/tmp/partner.csv -- login to psql on local container psql -h localhost -p 5432 -U lung_cancer_screening -d lung_cancer_screening @@ -75,10 +75,10 @@ SELECT * FROM incentive_partner_import_raw; ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score -) FROM '/Users/stephhousden/Downloads/SampleDataPIDRemoved230126.csv' WITH (FORMAT csv, HEADER true); +) FROM '/Users/*YourUsername*/Downloads/SampleDataPIDRemoved230126.csv' WITH (FORMAT csv, HEADER true); -- Select distinct email addresses of eligible participants based on criteria outlined at the top of this file -- One line command for psql: -SELECT qu.email FROM questions_user qu JOIN questions_responseset qrs ON qrs.user_id = qu.id JOIN incentive_partner_import_raw ipir ON ipir.nhs_number = qu.nhs_number WHERE (NULLIF(ipir.date_conducted, '')::timestamptz > qrs.submitted_at); +\copy (SELECT qu.email FROM questions_user qu JOIN questions_responseset qrs ON qrs.user_id = qu.id JOIN incentive_partner_import_raw ipir ON ipir.nhs_number = qu.nhs_number WHERE (NULLIF(ipir.date_conducted, '')::timestamptz > qrs.submitted_at)) TO 'C:/Users/*YourUsername*/Documents/eligible_participants_export.csv' WITH (FORMAT csv, HEADER true); From a8134c776becb289af0ea2a58afe941dfcc78599 Mon Sep 17 00:00:00 2001 From: stephhou <167300771+stephhou@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:46:23 +0100 Subject: [PATCH 03/11] fix: removed test select query --- scripts/sql/export_email_addresses_for_incentives.sql | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/sql/export_email_addresses_for_incentives.sql b/scripts/sql/export_email_addresses_for_incentives.sql index eca4e36f..4d4c3cd5 100644 --- a/scripts/sql/export_email_addresses_for_incentives.sql +++ b/scripts/sql/export_email_addresses_for_incentives.sql @@ -11,10 +11,7 @@ podman ps --format "{{.Names}}" -- Copy partner csv file to server temp directory podman cp "/Users/*YourUsername*/Downloads/SampleDataPIDRemoved230126.csv" :/tmp/partner.csv -podman cp "/Users/*YourUsername*/Downloads/SampleDataPIDRemoved230126.csv" lung_cancer_screening-db-1:/tmp/partner.csv - -- login to psql on local container -psql -h localhost -p 5432 -U lung_cancer_screening -d lung_cancer_screening -- Create table for partner data import @@ -46,9 +43,6 @@ CREATE TABLE incentive_partner_import_raw ( created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); - -SELECT * FROM incentive_partner_import_raw; - -- psql command to import data from CSV file into the temp table -- One line command for psql: From aa5af0f9dbec29af68658906a7291ad39ad7da43 Mon Sep 17 00:00:00 2001 From: stephhou <167300771+stephhou@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:35:45 +0100 Subject: [PATCH 04/11] fix: updated scripts to check for and update incentivised table. Add new table --- ...uquitsmokingresponse_value_incentivised.py | 35 ++++++ .../questions/models/__init__.py | 1 + .../questions/models/incentivised.py | 20 +++ .../export_email_addresses_for_incentives.sql | 117 ++++++++---------- 4 files changed, 108 insertions(+), 65 deletions(-) create mode 100644 lung_cancer_screening/questions/migrations/0012_alter_whenyouquitsmokingresponse_value_incentivised.py create mode 100644 lung_cancer_screening/questions/models/incentivised.py diff --git a/lung_cancer_screening/questions/migrations/0012_alter_whenyouquitsmokingresponse_value_incentivised.py b/lung_cancer_screening/questions/migrations/0012_alter_whenyouquitsmokingresponse_value_incentivised.py new file mode 100644 index 00000000..ae7bb0ae --- /dev/null +++ b/lung_cancer_screening/questions/migrations/0012_alter_whenyouquitsmokingresponse_value_incentivised.py @@ -0,0 +1,35 @@ +# Generated by Django 6.0.3 on 2026-04-13 15:15 + +import django.core.validators +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('questions', '0011_alter_whenyouquitsmokingresponse_response_set'), + ] + + operations = [ + migrations.AlterField( + model_name='whenyouquitsmokingresponse', + name='value', + field=models.IntegerField(validators=[django.core.validators.MinValueValidator(1)]), + ), + migrations.CreateModel( + name='Incentivised', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('incentivised_at', models.DateTimeField(auto_now_add=True)), + ('response_set', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='incentivised_record', to='questions.responseset')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='incentivised_records', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/lung_cancer_screening/questions/models/__init__.py b/lung_cancer_screening/questions/models/__init__.py index 97c0536a..c08f3b0a 100644 --- a/lung_cancer_screening/questions/models/__init__.py +++ b/lung_cancer_screening/questions/models/__init__.py @@ -12,6 +12,7 @@ from .gender_response import GenderResponse # noqa: F401 from .have_you_ever_smoked_response import HaveYouEverSmokedResponse # noqa: F401 from .height_response import HeightResponse # noqa: F401 +from .incentivised import Incentivised # noqa: F401 from .periods_when_you_stopped_smoking_response import PeriodsWhenYouStoppedSmokingResponse # noqa: F401 from .relatives_age_when_diagnosed_response import RelativesAgeWhenDiagnosedResponse # noqa: F401 from .respiratory_conditions_response import RespiratoryConditionsResponse # noqa: F401 diff --git a/lung_cancer_screening/questions/models/incentivised.py b/lung_cancer_screening/questions/models/incentivised.py new file mode 100644 index 00000000..92dd33d1 --- /dev/null +++ b/lung_cancer_screening/questions/models/incentivised.py @@ -0,0 +1,20 @@ + +from django.db import models + +from .base import BaseModel +from .response_set import ResponseSet + + +class Incentivised(BaseModel): + user = models.ForeignKey('questions.User', on_delete=models.CASCADE, related_name='incentivised_records') + response_set = models.OneToOneField(ResponseSet, on_delete=models.CASCADE, related_name='incentivised_record') + + incentivised_at = models.DateTimeField(auto_now_add=True) + +class Meta: + constraints = [ + models.UniqueConstraint( + fields=["user"], + name="unique_incentive_per_user", + ) + ] diff --git a/scripts/sql/export_email_addresses_for_incentives.sql b/scripts/sql/export_email_addresses_for_incentives.sql index 4d4c3cd5..95c01420 100644 --- a/scripts/sql/export_email_addresses_for_incentives.sql +++ b/scripts/sql/export_email_addresses_for_incentives.sql @@ -1,10 +1,11 @@ --- This script: +-- Commands are written on one line for psql execution, can be formatted for readability when running in a SQL client. +-- This script performs the following steps: -- 1 - creates a list of email addresses of participants eligible for incentives --- 2 - updates participants with incentives so that they are timestamped as having received an incentive +-- 2 - updates participants that have been exported so that they are timestamped as having received an incentive -- 3 Incentive eligibility is based on: -- - having submitted the questionnaire -- - not having already received an incentive --- - Submitted online questionnaire before telephone questionnaire conducted date +-- - Online submitted_at date is earlier than telephone questionnaire date_conducted. -- Find container name: podman ps --format "{{.Names}}" @@ -14,65 +15,51 @@ podman cp "/Users/*YourUsername*/Downloads/SampleDataPIDRemoved230126.csv" qrs.submitted_at)) TO 'C:/Users/*YourUsername*/Documents/eligible_participants_export.csv' WITH (FORMAT csv, HEADER true); +-- Create temp table for partner data import (can be permanent) +-- importing all date from sample file or test - would we want to do this with live data? Do we need all columns? + +-- single line command for psql to Create table +CREATE TEMP TABLE tmp_incentive_partner_import_raw (nhs_number TEXT, date_of_birth TEXT, date_conducted TEXT, smoking_status TEXT, average_cigarettes_per_day_while_smoking TEXT, duration_smoked_years TEXT, years_since_quitting_smoking TEXT, height_measurement_type TEXT, height_measurement_value_metric_cm TEXT, weight_measurement_type TEXT, weight_measurement_value_metric_kg TEXT, previous_respiratory_diagnosis TEXT, personal_history_of_previous_cancer TEXT, family_history_of_lung_cancer TEXT, personal_history_of_asthma TEXT, asbestos_exposure_from_job_or_activity TEXT, education TEXT, ethnicity TEXT, plco_lung_cancer_risk_score TEXT, llp_lung_cancer_risk_score TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT now()); + +-- psql command to import data from CSV file into the temp table - currently 40 rows of test data in sample file: +-- One line command for psql to import data from CSV file into the temp table: +\copy tmp_incentive_partner_import_raw (nhs_number, date_of_birth, date_conducted, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) FROM '/tmp/partner.csv' WITH (FORMAT csv, HEADER true); + + +-- TRANSACTION START for exporting eligible participants for incentives and updating incentivised table. +\r +BEGIN; +CREATE TEMP TABLE tmp_eligible_incentive_export AS SELECT DISTINCT ON (qrs.id) qu.id AS user_id, qrs.id AS response_set_id, qu.email FROM questions_user qu JOIN questions_responseset qrs ON qrs.user_id = qu.id JOIN tmp_incentive_partner_import_raw ipir ON ipir.nhs_number = qu.nhs_number LEFT JOIN questions_incentivised qi ON qi.response_set_id = qrs.id WHERE to_timestamp(NULLIF(ipir.date_conducted, ''), 'DD/MM/YYYY HH24:MI')::timestamptz > qrs.submitted_at::timestamptz AND qi.id IS NULL ORDER BY qrs.id, qrs.submitted_at DESC; +\copy (SELECT email FROM tmp_eligible_incentive_export ORDER BY email) TO '/tmp/eligible_participants_export.csv' WITH (FORMAT csv, HEADER true); +INSERT INTO questions_incentivised (created_at, updated_at, incentivised_at, user_id, response_set_id) SELECT now(), now(), now(), user_id, response_set_id FROM tmp_eligible_incentive_export; +SELECT count(*) AS rows_exported_and_marked FROM tmp_eligible_incentive_export; +-- COMMIT; +-- ROLLBACK; + +-- TRANSACTION END + +-- DELETE temp tables +DROP TABLE IF EXISTS tmp_eligible_incentive_export; +DROP TABLE IF EXISTS tmp_incentive_partner_import_raw; + + +-- Test data creation for testing incentive logic - creating 40 test users with varying submitted_at dates and response_set_ids to test export and incentivised table update logic. +-- This has been included for completeness, can delete before full PR. +BEGIN; + +DELETE FROM questions_responseset WHERE user_id IN (SELECT id FROM questions_user WHERE nhs_number BETWEEN '11111111' AND '11111150'); + +INSERT INTO questions_user (password, last_login, sub, nhs_number, given_name, family_name, email, created_at, updated_at) SELECT '!' || md5('seed-' || gs::text), NULL, 'seed-' || gs::text, gs::text, 'Test', 'User' || right(gs::text, 2), 'test.' || gs::text || '@example.com', now(), now() FROM generate_series(11111111::bigint, 11111150::bigint) gs WHERE NOT EXISTS (SELECT 1 FROM questions_user WHERE nhs_number = gs::text); + +INSERT INTO questions_responseset (created_at, updated_at, submitted_at, user_id) SELECT now(), now(), CASE (row_number() OVER (ORDER BY qu.nhs_number)) % 3 WHEN 0 THEN NULL WHEN 1 THEN now() ELSE timestamp '2023-06-01 09:00:00' + (((row_number() OVER (ORDER BY qu.nhs_number)) * 7) || ' days')::interval END, qu.id FROM questions_user qu WHERE qu.nhs_number BETWEEN '11111111' AND '11111150'; + +INSERT INTO questions_dateofbirthresponse (created_at, updated_at, value, response_set_id) SELECT now(), now(), (date '1960-01-01' + ((row_number() OVER (ORDER BY qu.nhs_number)) * 30 || ' days')::interval)::date, qrs.id FROM questions_responseset qrs JOIN questions_user qu ON qu.id = qrs.user_id WHERE qu.nhs_number BETWEEN '11111111' AND '11111150'; + +--COMMIT; +--ROLLBACK; + +-- clean up test data + +DELETE FROM questions_responseset WHERE user_id IN (SELECT id FROM questions_user WHERE nhs_number BETWEEN '11111111' AND '11111150'); +DELETE FROM questions_user WHERE user_id IN (SELECT id FROM questions_user WHERE nhs_number BETWEEN '11111111' AND '11111150'); + From 61c3c35df9b6f7781631231b1ecdc4f6e21c4184 Mon Sep 17 00:00:00 2001 From: stephhou <167300771+stephhou@users.noreply.github.com> Date: Tue, 21 Apr 2026 13:27:32 +0100 Subject: [PATCH 05/11] fix: updates to scripts and docs --- .../export_email_addresses_for_incentives.sql | 90 ++++++++++--------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/scripts/sql/export_email_addresses_for_incentives.sql b/scripts/sql/export_email_addresses_for_incentives.sql index 95c01420..3acc6257 100644 --- a/scripts/sql/export_email_addresses_for_incentives.sql +++ b/scripts/sql/export_email_addresses_for_incentives.sql @@ -1,65 +1,73 @@ -- Commands are written on one line for psql execution, can be formatted for readability when running in a SQL client. -- This script performs the following steps: --- 1 - creates a list of email addresses of participants eligible for incentives --- 2 - updates participants that have been exported so that they are timestamped as having received an incentive --- 3 Incentive eligibility is based on: +-- 1 - imports new data from csv file. The csv file is expected to be a full export of all partner records, but only new records will be inserted into the permanent table. +-- 2 - creates a list of email addresses of participants eligible for incentives +-- 3 - updates participants that have been exported so that they are timestamped as having received an incentive + +-- Incentive eligibility is based on: -- - having submitted the questionnaire -- - not having already received an incentive -- - Online submitted_at date is earlier than telephone questionnaire date_conducted. --- Find container name: -podman ps --format "{{.Names}}" --- Copy partner csv file to server temp directory -podman cp "/Users/*YourUsername*/Downloads/SampleDataPIDRemoved230126.csv" :/tmp/partner.csv +-- Partner import strategy: +-- The partner sends a full CSV export every week. Rather than replacing all data each run, we insert only +-- new records using a UNIQUE index on (nhs_number, conducted_at) and ON CONFLICT DO NOTHING. +-- Records removed from the partner's source will NOT be deleted. + + +-- Steps to follow: +-- 1. Log into AVD. +-- 2. Upload file to AVD +-- 3. Find file in RemoteVirtualDrive and copy to accessible location for psql COPY command. +-- 4. PATH_TO_FILE - search for this and replace with actual file path. +-- 5. Login to DB in AVD. + + +-- ============================================================ +-- RUN ONCE: Create permanent partner import table +-- Only run this block on first setup. The unique constraint on +-- (nhs_number, conducted_at) prevents duplicate rows being +-- inserted on subsequent weekly loads. +-- ============================================================ +CREATE TABLE IF NOT EXISTS incentive_partner_import (nhs_number TEXT, date_of_birth TEXT, date_conducted TEXT, conducted_at TIMESTAMPTZ, smoking_status TEXT, average_cigarettes_per_day_while_smoking TEXT, duration_smoked_years TEXT, years_since_quitting_smoking TEXT, height_measurement_type TEXT, height_measurement_value_metric_cm TEXT, weight_measurement_type TEXT, weight_measurement_value_metric_kg TEXT, previous_respiratory_diagnosis TEXT, personal_history_of_previous_cancer TEXT, family_history_of_lung_cancer TEXT, personal_history_of_asthma TEXT, asbestos_exposure_from_job_or_activity TEXT, education TEXT, ethnicity TEXT, plco_lung_cancer_risk_score TEXT, llp_lung_cancer_risk_score TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT uq_partner_nhs_conducted_at UNIQUE (nhs_number, conducted_at)); --- login to psql on local container +-- ============================================================ +-- RUN WEEKLY: Load new partner records from CSV +-- Step 1: Import CSV into a temporary staging table. +-- Step 2: Insert only rows where (nhs_number, conducted_at) +-- do not already exist in the permanent table. +-- Existing rows are silently skipped (DO NOTHING). +-- ============================================================ --- Create temp table for partner data import (can be permanent) --- importing all date from sample file or test - would we want to do this with live data? Do we need all columns? +-- Step 1: Create staging table and load CSV +CREATE TEMP TABLE tmp_incentive_partner_staging (nhs_number TEXT, date_of_birth TEXT, date_conducted TEXT, smoking_status TEXT, average_cigarettes_per_day_while_smoking TEXT, duration_smoked_years TEXT, years_since_quitting_smoking TEXT, height_measurement_type TEXT, height_measurement_value_metric_cm TEXT, weight_measurement_type TEXT, weight_measurement_value_metric_kg TEXT, previous_respiratory_diagnosis TEXT, personal_history_of_previous_cancer TEXT, family_history_of_lung_cancer TEXT, personal_history_of_asthma TEXT, asbestos_exposure_from_job_or_activity TEXT, education TEXT, ethnicity TEXT, plco_lung_cancer_risk_score TEXT, llp_lung_cancer_risk_score TEXT); --- single line command for psql to Create table -CREATE TEMP TABLE tmp_incentive_partner_import_raw (nhs_number TEXT, date_of_birth TEXT, date_conducted TEXT, smoking_status TEXT, average_cigarettes_per_day_while_smoking TEXT, duration_smoked_years TEXT, years_since_quitting_smoking TEXT, height_measurement_type TEXT, height_measurement_value_metric_cm TEXT, weight_measurement_type TEXT, weight_measurement_value_metric_kg TEXT, previous_respiratory_diagnosis TEXT, personal_history_of_previous_cancer TEXT, family_history_of_lung_cancer TEXT, personal_history_of_asthma TEXT, asbestos_exposure_from_job_or_activity TEXT, education TEXT, ethnicity TEXT, plco_lung_cancer_risk_score TEXT, llp_lung_cancer_risk_score TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT now()); +-- Copy data from file into staging table - update PATH_TO_FILE before running --- psql command to import data from CSV file into the temp table - currently 40 rows of test data in sample file: --- One line command for psql to import data from CSV file into the temp table: -\copy tmp_incentive_partner_import_raw (nhs_number, date_of_birth, date_conducted, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) FROM '/tmp/partner.csv' WITH (FORMAT csv, HEADER true); +\copy tmp_incentive_partner_staging (nhs_number, date_of_birth, date_conducted, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) FROM 'PATH_TO_FILE' WITH (FORMAT csv, HEADER true); + +-- Step 2: Insert only new records; skip any (nhs_number, conducted_at) already present +INSERT INTO incentive_partner_import (nhs_number, date_of_birth, date_conducted, conducted_at, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) SELECT nhs_number, date_of_birth, date_conducted, to_timestamp(NULLIF(date_conducted, ''), 'DD/MM/YYYY HH24:MI')::timestamptz, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score FROM tmp_incentive_partner_staging ON CONFLICT (nhs_number, conducted_at) DO NOTHING; + +DROP TABLE IF EXISTS tmp_incentive_partner_staging; -- TRANSACTION START for exporting eligible participants for incentives and updating incentivised table. +-- Update PATH_TO_EXPORT_FILE before running. \r BEGIN; -CREATE TEMP TABLE tmp_eligible_incentive_export AS SELECT DISTINCT ON (qrs.id) qu.id AS user_id, qrs.id AS response_set_id, qu.email FROM questions_user qu JOIN questions_responseset qrs ON qrs.user_id = qu.id JOIN tmp_incentive_partner_import_raw ipir ON ipir.nhs_number = qu.nhs_number LEFT JOIN questions_incentivised qi ON qi.response_set_id = qrs.id WHERE to_timestamp(NULLIF(ipir.date_conducted, ''), 'DD/MM/YYYY HH24:MI')::timestamptz > qrs.submitted_at::timestamptz AND qi.id IS NULL ORDER BY qrs.id, qrs.submitted_at DESC; -\copy (SELECT email FROM tmp_eligible_incentive_export ORDER BY email) TO '/tmp/eligible_participants_export.csv' WITH (FORMAT csv, HEADER true); +CREATE TEMP TABLE tmp_eligible_incentive_export AS WITH canonical_users AS (SELECT DISTINCT ON (nhs_number) id, email, nhs_number FROM questions_user WHERE nhs_number IS NOT NULL ORDER BY nhs_number, created_at DESC) SELECT DISTINCT ON (qrs.id) cu.id AS user_id, qrs.id AS response_set_id, cu.email FROM canonical_users cu JOIN questions_responseset qrs ON qrs.user_id = cu.id JOIN incentive_partner_import ipi ON ipi.nhs_number = cu.nhs_number LEFT JOIN questions_incentivised qi ON qi.response_set_id = qrs.id WHERE ipi.conducted_at > qrs.submitted_at::timestamptz AND qi.id IS NULL ORDER BY qrs.id, qrs.submitted_at DESC; +\copy (SELECT email FROM tmp_eligible_incentive_export ORDER BY email) TO 'PATH_TO_EXPORT_FILE' WITH (FORMAT csv, HEADER true); INSERT INTO questions_incentivised (created_at, updated_at, incentivised_at, user_id, response_set_id) SELECT now(), now(), now(), user_id, response_set_id FROM tmp_eligible_incentive_export; SELECT count(*) AS rows_exported_and_marked FROM tmp_eligible_incentive_export; + +-- I happy with the Select result type COMMIT; if not, ROLLBACK; to undo changes; + -- COMMIT; -- ROLLBACK; -- TRANSACTION END --- DELETE temp tables +-- DELETE temp table DROP TABLE IF EXISTS tmp_eligible_incentive_export; -DROP TABLE IF EXISTS tmp_incentive_partner_import_raw; - - --- Test data creation for testing incentive logic - creating 40 test users with varying submitted_at dates and response_set_ids to test export and incentivised table update logic. --- This has been included for completeness, can delete before full PR. -BEGIN; - -DELETE FROM questions_responseset WHERE user_id IN (SELECT id FROM questions_user WHERE nhs_number BETWEEN '11111111' AND '11111150'); - -INSERT INTO questions_user (password, last_login, sub, nhs_number, given_name, family_name, email, created_at, updated_at) SELECT '!' || md5('seed-' || gs::text), NULL, 'seed-' || gs::text, gs::text, 'Test', 'User' || right(gs::text, 2), 'test.' || gs::text || '@example.com', now(), now() FROM generate_series(11111111::bigint, 11111150::bigint) gs WHERE NOT EXISTS (SELECT 1 FROM questions_user WHERE nhs_number = gs::text); - -INSERT INTO questions_responseset (created_at, updated_at, submitted_at, user_id) SELECT now(), now(), CASE (row_number() OVER (ORDER BY qu.nhs_number)) % 3 WHEN 0 THEN NULL WHEN 1 THEN now() ELSE timestamp '2023-06-01 09:00:00' + (((row_number() OVER (ORDER BY qu.nhs_number)) * 7) || ' days')::interval END, qu.id FROM questions_user qu WHERE qu.nhs_number BETWEEN '11111111' AND '11111150'; - -INSERT INTO questions_dateofbirthresponse (created_at, updated_at, value, response_set_id) SELECT now(), now(), (date '1960-01-01' + ((row_number() OVER (ORDER BY qu.nhs_number)) * 30 || ' days')::interval)::date, qrs.id FROM questions_responseset qrs JOIN questions_user qu ON qu.id = qrs.user_id WHERE qu.nhs_number BETWEEN '11111111' AND '11111150'; - ---COMMIT; ---ROLLBACK; - --- clean up test data - -DELETE FROM questions_responseset WHERE user_id IN (SELECT id FROM questions_user WHERE nhs_number BETWEEN '11111111' AND '11111150'); -DELETE FROM questions_user WHERE user_id IN (SELECT id FROM questions_user WHERE nhs_number BETWEEN '11111111' AND '11111150'); - From 95f4524d555a63f69af3e28c3b885ae985d8f32b Mon Sep 17 00:00:00 2001 From: stephhou <167300771+stephhou@users.noreply.github.com> Date: Wed, 22 Apr 2026 09:40:11 +0100 Subject: [PATCH 06/11] fix: typso and renaming tables --- .../sql/export_email_addresses_for_incentives.sql | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/sql/export_email_addresses_for_incentives.sql b/scripts/sql/export_email_addresses_for_incentives.sql index 3acc6257..6579e2fb 100644 --- a/scripts/sql/export_email_addresses_for_incentives.sql +++ b/scripts/sql/export_email_addresses_for_incentives.sql @@ -17,7 +17,7 @@ -- Steps to follow: -- 1. Log into AVD. --- 2. Upload file to AVD +-- 2. Upload csv file to AVD -- 3. Find file in RemoteVirtualDrive and copy to accessible location for psql COPY command. -- 4. PATH_TO_FILE - search for this and replace with actual file path. -- 5. Login to DB in AVD. @@ -29,7 +29,7 @@ -- (nhs_number, conducted_at) prevents duplicate rows being -- inserted on subsequent weekly loads. -- ============================================================ -CREATE TABLE IF NOT EXISTS incentive_partner_import (nhs_number TEXT, date_of_birth TEXT, date_conducted TEXT, conducted_at TIMESTAMPTZ, smoking_status TEXT, average_cigarettes_per_day_while_smoking TEXT, duration_smoked_years TEXT, years_since_quitting_smoking TEXT, height_measurement_type TEXT, height_measurement_value_metric_cm TEXT, weight_measurement_type TEXT, weight_measurement_value_metric_kg TEXT, previous_respiratory_diagnosis TEXT, personal_history_of_previous_cancer TEXT, family_history_of_lung_cancer TEXT, personal_history_of_asthma TEXT, asbestos_exposure_from_job_or_activity TEXT, education TEXT, ethnicity TEXT, plco_lung_cancer_risk_score TEXT, llp_lung_cancer_risk_score TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT uq_partner_nhs_conducted_at UNIQUE (nhs_number, conducted_at)); +CREATE TABLE IF NOT EXISTS inhealth_partner_data (nhs_number TEXT, date_of_birth TEXT, date_conducted TEXT, conducted_at TIMESTAMPTZ, smoking_status TEXT, average_cigarettes_per_day_while_smoking TEXT, duration_smoked_years TEXT, years_since_quitting_smoking TEXT, height_measurement_type TEXT, height_measurement_value_metric_cm TEXT, weight_measurement_type TEXT, weight_measurement_value_metric_kg TEXT, previous_respiratory_diagnosis TEXT, personal_history_of_previous_cancer TEXT, family_history_of_lung_cancer TEXT, personal_history_of_asthma TEXT, asbestos_exposure_from_job_or_activity TEXT, education TEXT, ethnicity TEXT, plco_lung_cancer_risk_score TEXT, llp_lung_cancer_risk_score TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT uq_partner_nhs_conducted_at UNIQUE (nhs_number, conducted_at)); -- ============================================================ @@ -48,8 +48,9 @@ CREATE TEMP TABLE tmp_incentive_partner_staging (nhs_number TEXT, date_of_birth \copy tmp_incentive_partner_staging (nhs_number, date_of_birth, date_conducted, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) FROM 'PATH_TO_FILE' WITH (FORMAT csv, HEADER true); -- Step 2: Insert only new records; skip any (nhs_number, conducted_at) already present -INSERT INTO incentive_partner_import (nhs_number, date_of_birth, date_conducted, conducted_at, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) SELECT nhs_number, date_of_birth, date_conducted, to_timestamp(NULLIF(date_conducted, ''), 'DD/MM/YYYY HH24:MI')::timestamptz, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score FROM tmp_incentive_partner_staging ON CONFLICT (nhs_number, conducted_at) DO NOTHING; +INSERT INTO inhealth_partner_data (nhs_number, date_of_birth, date_conducted, conducted_at, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) SELECT nhs_number, date_of_birth, date_conducted, to_timestamp(NULLIF(date_conducted, ''), 'DD/MM/YYYY HH24:MI')::timestamptz, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score FROM tmp_incentive_partner_staging ON CONFLICT (nhs_number, conducted_at) DO NOTHING; +-- Delete temporary staging table DROP TABLE IF EXISTS tmp_incentive_partner_staging; @@ -57,17 +58,17 @@ DROP TABLE IF EXISTS tmp_incentive_partner_staging; -- Update PATH_TO_EXPORT_FILE before running. \r BEGIN; -CREATE TEMP TABLE tmp_eligible_incentive_export AS WITH canonical_users AS (SELECT DISTINCT ON (nhs_number) id, email, nhs_number FROM questions_user WHERE nhs_number IS NOT NULL ORDER BY nhs_number, created_at DESC) SELECT DISTINCT ON (qrs.id) cu.id AS user_id, qrs.id AS response_set_id, cu.email FROM canonical_users cu JOIN questions_responseset qrs ON qrs.user_id = cu.id JOIN incentive_partner_import ipi ON ipi.nhs_number = cu.nhs_number LEFT JOIN questions_incentivised qi ON qi.response_set_id = qrs.id WHERE ipi.conducted_at > qrs.submitted_at::timestamptz AND qi.id IS NULL ORDER BY qrs.id, qrs.submitted_at DESC; +CREATE TEMP TABLE tmp_eligible_incentive_export AS WITH canonical_users AS (SELECT DISTINCT ON (nhs_number) id, email, nhs_number FROM questions_user WHERE nhs_number IS NOT NULL ORDER BY nhs_number, created_at DESC) SELECT DISTINCT ON (qrs.id) cu.id AS user_id, qrs.id AS response_set_id, cu.email FROM canonical_users cu JOIN questions_responseset qrs ON qrs.user_id = cu.id JOIN inhealth_partner_data ipd ON ipd.nhs_number = cu.nhs_number LEFT JOIN questions_incentivised qi ON qi.response_set_id = qrs.id WHERE ipd.conducted_at > qrs.submitted_at::timestamptz AND qi.id IS NULL ORDER BY qrs.id, qrs.submitted_at DESC; \copy (SELECT email FROM tmp_eligible_incentive_export ORDER BY email) TO 'PATH_TO_EXPORT_FILE' WITH (FORMAT csv, HEADER true); INSERT INTO questions_incentivised (created_at, updated_at, incentivised_at, user_id, response_set_id) SELECT now(), now(), now(), user_id, response_set_id FROM tmp_eligible_incentive_export; SELECT count(*) AS rows_exported_and_marked FROM tmp_eligible_incentive_export; --- I happy with the Select result type COMMIT; if not, ROLLBACK; to undo changes; +-- If happy with the Select result type COMMIT; if not, ROLLBACK; to undo changes; -- COMMIT; -- ROLLBACK; -- TRANSACTION END --- DELETE temp table +-- DELETE tempprary export table DROP TABLE IF EXISTS tmp_eligible_incentive_export; From ed42261fe11c62383b9bd521fa812fdcd963071a Mon Sep 17 00:00:00 2001 From: Steph Housden <167300771+stephhou@users.noreply.github.com> Date: Wed, 22 Apr 2026 10:39:44 +0100 Subject: [PATCH 07/11] Update lung_cancer_screening/questions/migrations/0012_alter_whenyouquitsmokingresponse_value_incentivised.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Steph Housden <167300771+stephhou@users.noreply.github.com> --- ...012_alter_whenyouquitsmokingresponse_value_incentivised.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lung_cancer_screening/questions/migrations/0012_alter_whenyouquitsmokingresponse_value_incentivised.py b/lung_cancer_screening/questions/migrations/0012_alter_whenyouquitsmokingresponse_value_incentivised.py index ae7bb0ae..6256301e 100644 --- a/lung_cancer_screening/questions/migrations/0012_alter_whenyouquitsmokingresponse_value_incentivised.py +++ b/lung_cancer_screening/questions/migrations/0012_alter_whenyouquitsmokingresponse_value_incentivised.py @@ -32,4 +32,8 @@ class Migration(migrations.Migration): 'abstract': False, }, ), + migrations.AddConstraint( + model_name='incentivised', + constraint=models.UniqueConstraint(fields=('user',), name='questions_incentivised_unique_user'), + ), ] From 99c5269da083b02d047099390b4ad6a2bdefdcf7 Mon Sep 17 00:00:00 2001 From: Steph Housden <167300771+stephhou@users.noreply.github.com> Date: Wed, 22 Apr 2026 10:40:24 +0100 Subject: [PATCH 08/11] Update lung_cancer_screening/questions/models/incentivised.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Steph Housden <167300771+stephhou@users.noreply.github.com> --- .../questions/models/incentivised.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lung_cancer_screening/questions/models/incentivised.py b/lung_cancer_screening/questions/models/incentivised.py index 92dd33d1..133712a9 100644 --- a/lung_cancer_screening/questions/models/incentivised.py +++ b/lung_cancer_screening/questions/models/incentivised.py @@ -11,10 +11,10 @@ class Incentivised(BaseModel): incentivised_at = models.DateTimeField(auto_now_add=True) -class Meta: - constraints = [ - models.UniqueConstraint( - fields=["user"], - name="unique_incentive_per_user", - ) - ] + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["user"], + name="unique_incentive_per_user", + ) + ] From f1fb4056b8317ccbf11a9d1283439e462c582795 Mon Sep 17 00:00:00 2001 From: Steph Housden <167300771+stephhou@users.noreply.github.com> Date: Wed, 22 Apr 2026 10:40:36 +0100 Subject: [PATCH 09/11] Update scripts/sql/export_email_addresses_for_incentives.sql Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Steph Housden <167300771+stephhou@users.noreply.github.com> --- scripts/sql/export_email_addresses_for_incentives.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sql/export_email_addresses_for_incentives.sql b/scripts/sql/export_email_addresses_for_incentives.sql index 6579e2fb..81518623 100644 --- a/scripts/sql/export_email_addresses_for_incentives.sql +++ b/scripts/sql/export_email_addresses_for_incentives.sql @@ -70,5 +70,5 @@ SELECT count(*) AS rows_exported_and_marked FROM tmp_eligible_incentive_export; -- TRANSACTION END --- DELETE tempprary export table +-- DELETE temporary export table DROP TABLE IF EXISTS tmp_eligible_incentive_export; From c57edb3ee133f0cbd7d691f11dc713eae98b5f08 Mon Sep 17 00:00:00 2001 From: stephhou <167300771+stephhou@users.noreply.github.com> Date: Thu, 23 Apr 2026 09:25:09 +0100 Subject: [PATCH 10/11] fix: Update to distinct NHS number --- scripts/sql/export_email_addresses_for_incentives.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/sql/export_email_addresses_for_incentives.sql b/scripts/sql/export_email_addresses_for_incentives.sql index 81518623..0cbee522 100644 --- a/scripts/sql/export_email_addresses_for_incentives.sql +++ b/scripts/sql/export_email_addresses_for_incentives.sql @@ -1,4 +1,5 @@ -- Commands are written on one line for psql execution, can be formatted for readability when running in a SQL client. + -- This script performs the following steps: -- 1 - imports new data from csv file. The csv file is expected to be a full export of all partner records, but only new records will be inserted into the permanent table. -- 2 - creates a list of email addresses of participants eligible for incentives @@ -47,7 +48,8 @@ CREATE TEMP TABLE tmp_incentive_partner_staging (nhs_number TEXT, date_of_birth \copy tmp_incentive_partner_staging (nhs_number, date_of_birth, date_conducted, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) FROM 'PATH_TO_FILE' WITH (FORMAT csv, HEADER true); --- Step 2: Insert only new records; skip any (nhs_number, conducted_at) already present +-- Step 2: Insert all rows from staging. Existing rows with the same (nhs_number, conducted_at) +-- are skipped. INSERT INTO inhealth_partner_data (nhs_number, date_of_birth, date_conducted, conducted_at, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) SELECT nhs_number, date_of_birth, date_conducted, to_timestamp(NULLIF(date_conducted, ''), 'DD/MM/YYYY HH24:MI')::timestamptz, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score FROM tmp_incentive_partner_staging ON CONFLICT (nhs_number, conducted_at) DO NOTHING; -- Delete temporary staging table @@ -58,7 +60,7 @@ DROP TABLE IF EXISTS tmp_incentive_partner_staging; -- Update PATH_TO_EXPORT_FILE before running. \r BEGIN; -CREATE TEMP TABLE tmp_eligible_incentive_export AS WITH canonical_users AS (SELECT DISTINCT ON (nhs_number) id, email, nhs_number FROM questions_user WHERE nhs_number IS NOT NULL ORDER BY nhs_number, created_at DESC) SELECT DISTINCT ON (qrs.id) cu.id AS user_id, qrs.id AS response_set_id, cu.email FROM canonical_users cu JOIN questions_responseset qrs ON qrs.user_id = cu.id JOIN inhealth_partner_data ipd ON ipd.nhs_number = cu.nhs_number LEFT JOIN questions_incentivised qi ON qi.response_set_id = qrs.id WHERE ipd.conducted_at > qrs.submitted_at::timestamptz AND qi.id IS NULL ORDER BY qrs.id, qrs.submitted_at DESC; +CREATE TEMP TABLE tmp_eligible_incentive_export AS WITH canonical_users AS (SELECT DISTINCT ON (nhs_number) id, email, nhs_number FROM questions_user WHERE nhs_number IS NOT NULL ORDER BY nhs_number ASC, created_at DESC) SELECT DISTINCT ON (cu.nhs_number) cu.id AS user_id, qrs.id AS response_set_id, cu.email FROM canonical_users cu JOIN questions_responseset qrs ON qrs.user_id = cu.id JOIN inhealth_partner_data ipd ON ipd.nhs_number = cu.nhs_number WHERE ipd.conducted_at > qrs.submitted_at::timestamptz AND NOT EXISTS (SELECT 1 FROM questions_incentivised qi JOIN questions_user iu ON iu.id = qi.user_id WHERE iu.nhs_number = cu.nhs_number) ORDER BY cu.nhs_number ASC, qrs.submitted_at DESC, qrs.id DESC; \copy (SELECT email FROM tmp_eligible_incentive_export ORDER BY email) TO 'PATH_TO_EXPORT_FILE' WITH (FORMAT csv, HEADER true); INSERT INTO questions_incentivised (created_at, updated_at, incentivised_at, user_id, response_set_id) SELECT now(), now(), now(), user_id, response_set_id FROM tmp_eligible_incentive_export; SELECT count(*) AS rows_exported_and_marked FROM tmp_eligible_incentive_export; From 0eea078f8464c23a124185d22a21eb78ef2adea0 Mon Sep 17 00:00:00 2001 From: stephhou <167300771+stephhou@users.noreply.github.com> Date: Fri, 24 Apr 2026 18:49:32 +0100 Subject: [PATCH 11/11] fix: added first and last name, edited constraint to match pattern --- ...tions_incentivised_unique_user_and_more.py | 21 +++++++++++++++++++ .../export_email_addresses_for_incentives.sql | 5 +++-- 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 lung_cancer_screening/questions/migrations/0013_remove_incentivised_questions_incentivised_unique_user_and_more.py diff --git a/lung_cancer_screening/questions/migrations/0013_remove_incentivised_questions_incentivised_unique_user_and_more.py b/lung_cancer_screening/questions/migrations/0013_remove_incentivised_questions_incentivised_unique_user_and_more.py new file mode 100644 index 00000000..8d1997b6 --- /dev/null +++ b/lung_cancer_screening/questions/migrations/0013_remove_incentivised_questions_incentivised_unique_user_and_more.py @@ -0,0 +1,21 @@ +# Generated by Django 6.0.4 on 2026-04-24 12:12 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('questions', '0012_alter_whenyouquitsmokingresponse_value_incentivised'), + ] + + operations = [ + migrations.RemoveConstraint( + model_name='incentivised', + name='questions_incentivised_unique_user', + ), + migrations.AddConstraint( + model_name='incentivised', + constraint=models.UniqueConstraint(fields=('user',), name='unique_incentive_per_user'), + ), + ] diff --git a/scripts/sql/export_email_addresses_for_incentives.sql b/scripts/sql/export_email_addresses_for_incentives.sql index 0cbee522..3e46271e 100644 --- a/scripts/sql/export_email_addresses_for_incentives.sql +++ b/scripts/sql/export_email_addresses_for_incentives.sql @@ -58,10 +58,11 @@ DROP TABLE IF EXISTS tmp_incentive_partner_staging; -- TRANSACTION START for exporting eligible participants for incentives and updating incentivised table. -- Update PATH_TO_EXPORT_FILE before running. + \r BEGIN; -CREATE TEMP TABLE tmp_eligible_incentive_export AS WITH canonical_users AS (SELECT DISTINCT ON (nhs_number) id, email, nhs_number FROM questions_user WHERE nhs_number IS NOT NULL ORDER BY nhs_number ASC, created_at DESC) SELECT DISTINCT ON (cu.nhs_number) cu.id AS user_id, qrs.id AS response_set_id, cu.email FROM canonical_users cu JOIN questions_responseset qrs ON qrs.user_id = cu.id JOIN inhealth_partner_data ipd ON ipd.nhs_number = cu.nhs_number WHERE ipd.conducted_at > qrs.submitted_at::timestamptz AND NOT EXISTS (SELECT 1 FROM questions_incentivised qi JOIN questions_user iu ON iu.id = qi.user_id WHERE iu.nhs_number = cu.nhs_number) ORDER BY cu.nhs_number ASC, qrs.submitted_at DESC, qrs.id DESC; -\copy (SELECT email FROM tmp_eligible_incentive_export ORDER BY email) TO 'PATH_TO_EXPORT_FILE' WITH (FORMAT csv, HEADER true); +CREATE TEMP TABLE tmp_eligible_incentive_export AS WITH canonical_users AS (SELECT DISTINCT ON (nhs_number) id, email, given_name, family_name, nhs_number FROM questions_user WHERE nhs_number IS NOT NULL ORDER BY nhs_number ASC, created_at DESC) SELECT DISTINCT ON (cu.nhs_number) cu.id AS user_id, qrs.id AS response_set_id, cu.email, cu.given_name, cu.family_name FROM canonical_users cu JOIN questions_responseset qrs ON qrs.user_id = cu.id JOIN inhealth_partner_data ipd ON ipd.nhs_number = cu.nhs_number WHERE ipd.conducted_at > qrs.submitted_at::timestamptz AND NOT EXISTS (SELECT 1 FROM questions_incentivised qi JOIN questions_user iu ON iu.id = qi.user_id WHERE iu.nhs_number = cu.nhs_number) ORDER BY cu.nhs_number ASC, qrs.submitted_at DESC, qrs.id DESC; +\copy (SELECT email, given_name, family_name FROM tmp_eligible_incentive_export ORDER BY family_name) TO 'PATH_TO_EXPORT_FILE' WITH (FORMAT csv, HEADER true); INSERT INTO questions_incentivised (created_at, updated_at, incentivised_at, user_id, response_set_id) SELECT now(), now(), now(), user_id, response_set_id FROM tmp_eligible_incentive_export; SELECT count(*) AS rows_exported_and_marked FROM tmp_eligible_incentive_export;