diff --git a/.github/workflows/scrape.yml b/.github/workflows/scrape.yml
new file mode 100644
index 000000000..c07e5852e
--- /dev/null
+++ b/.github/workflows/scrape.yml
@@ -0,0 +1,28 @@
+on:
+  schedule:
+    # Run every day at 3am ET (7am UTC)
+    # See: https://crontab.guru/#0_7_*_*_*
+    - cron: "0 7 * * *"
+  push:
+
+
+jobs:
+  scrape:
+    runs-on: ubuntu-latest
+    # Only run when scheduled, or if push has "[force ci]" in its last commit message.
+    if: "github.event_name == 'schedule' || contains(github.event.head_commit.message, '[force ci]')"
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v3
+        with:
+          python-version: '3.10'
+
+      - run: pip install --user pipenv
+
+      - run: make run-code
+
+      - uses: actions/upload-artifact@v3
+        with:
+          name: url-list
+          path: list.csv
diff --git a/.ipynb_checkpoints/MissingPersons_DataExtract_Tom_May10-checkpoint.ipynb b/.ipynb_checkpoints/MissingPersons_DataExtract_Tom_May10-checkpoint.ipynb
index d11dd8651..eea65226f 100644
--- a/.ipynb_checkpoints/MissingPersons_DataExtract_Tom_May10-checkpoint.ipynb
+++ b/.ipynb_checkpoints/MissingPersons_DataExtract_Tom_May10-checkpoint.ipynb
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "cf682cb3",
    "metadata": {},
    "outputs": [],
@@ -43,7 +43,25 @@
     "from selenium import webdriver\n",
     "from selenium.webdriver.support.ui import Select\n",
     "from bs4 import BeautifulSoup as bs\n",
-    "import requests"
+    "import requests\n",
+    "import json\n",
+    "import csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "554bc853",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from webdriver_manager.chrome import ChromeDriverManager\n",
+    "from selenium.webdriver.chrome.service import Service\n",
+    "from selenium.webdriver.chrome.options import Options\n",
+    "\n",
+    "# Allow Chrome to run in headless mode (required for running in GitHub Actions)\n",
+    "options = Options()\n",
+    "options.headless = True"
    ]
   },
   {
@@ -55,11 +73,10 @@
    },
    "outputs": [],
    "source": [
-    "link = 'https://www.services.rcmp-grc.gc.ca/missing-disparus/search-recherche.jsf'\n",
-    "driver=r'C:\\Users\\Neptune\\Downloads\\chromedriver'\n",
+    "# Open Chrome browser. (get chrome driver if it isn't installed already)\n",
+    "browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)\n",
     "\n",
-    "# using the selenium web driver I downloaded\n",
-    "browser = webdriver.Chrome(driver)\n",
+    "link = 'https://www.services.rcmp-grc.gc.ca/missing-disparus/search-recherche.jsf'\n",
     "browser.get(link)\n",
     "time.sleep(2)\n",
     "\n",
@@ -75,7 +92,7 @@
     "except:\n",
     "    print('could not find Search')\n",
     "\n",
-    "time.sleep(5)\n",
+    "time.sleep(3)\n",
     "\n",
     "not_last = True\n",
     "# to store all the URLs\n",
@@ -107,7 +124,7 @@
     "        next_page = browser.find_element_by_xpath('/html/body/main/form/div[33]/ul/li[83]/a')\n",
     "        print('Found next button to press.')\n",
     "        next_page.click()\n",
-    "        time.sleep(4) # wait for next page to load\n",
+    "        time.sleep(2) # wait for next page to load\n",
     "    except:\n",
     "        # should not have a next button on the last page\n",
     "        print('last page or no next button found!')\n",
@@ -116,7 +133,12 @@
     "# the final list\n",
     "print(\"================================== END ==================================\")\n",
     "print(len(URLs[:30])) # show a section       \n",
-    "time.sleep(5)\n",
+    "time.sleep(2)\n",
+    "\n",
+    "# write progress to csv\n",
+    "df = pd.DataFrame(URLs, columns=[\"URL\"])\n",
+    "df.to_csv('list.csv', index=False)\n",
+    "\n",
     "browser.quit()"
    ]
   },
@@ -128,6 +150,14 @@
     "#### Collect all the data from all the detailed case pages"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "77947f1a",
+   "metadata": {},
+   "source": [
+    "This part of the code adapted from the CBC script"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -135,13 +165,377 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# loop through all the URLs and collect the data from each page\n",
-    "for url in URLs:\n",
-    "    persons_page_url = 'https://www.services.rcmp-grc.gc.ca' + url"
+    "base_url = r'https://www.services.rcmp-grc.gc.ca'\n",
+    "\n",
+    "#CLEANING FUNCTION\n",
+    "def cleaning_function(item):\n",
+    "    item = str(item)\n",
+    "    item = item.replace(\"<dd>\" , \"\")\n",
+    "    item = item.replace(\"</dd>\" , \"\")\n",
+    "    item = item.replace(\"<p>\" , \"\")\n",
+    "    item = item.replace(\"</p>\" , \"\")\n",
+    "    item = item.replace(\"<strong>Missing from </strong>\" , \"\")\n",
+    "    item = item.replace(\"<strong>\" , \"\")\n",
+    "    item = item.replace(\"</strong>\" , \"\")\n",
+    "    return item"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d72d3b0e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#this is the list where all the URLs from the sheet will go\n",
+    "person_url_list = []\n",
+    "#this is where all the person info will go\n",
+    "person_info = []\n",
+    "\n",
+    "#a list for the sections later\n",
+    "section_list = []\n",
+    "\n",
+    "#I have this because I dont know how else to filter out stuff from an if statement that I dont want\n",
+    "count_working = 0\n",
+    "\n",
+    "for page_url in URLs:\n",
+    "    print(\"Record Number: \" + str(count_working))\n",
+    "    print(\"Case URL: \" + page_url)\n",
+    "    count_working += 1\n",
+    "    url = base_url + page_url\n",
+    "    \n",
+    "    # request the html\n",
+    "    try:\n",
+    "        page = requests.get(url, timeout = 10)\n",
+    "    except requests.exceptions.Timeout:\n",
+    "        print(\"Timeout occurred\")\n",
+    "    # structure the page content for parsing\n",
+    "    soup = bs(page.content, 'html.parser') \n",
+    "    \n",
+    "    # First we have to pull out the content area\n",
+    "    content_area = soup.find('main' , {\"property\" : \"mainContentOfPage\"})\n",
+    "    \n",
+    "    # LOCATION\n",
+    "    try:\n",
+    "        location_scrape = content_area.find('div')\n",
+    "        location_isolate = location_scrape.find_all('p')\n",
+    "        location_string = str(location_isolate[2])\n",
+    "        location_split = location_string.split(\",\")\n",
+    "        province = cleaning_function(location_split[1])\n",
+    "        city = cleaning_function(location_split[0])\n",
+    "    except:\n",
+    "        print('No Location')\n",
+    "    \n",
+    "    # STATUS\n",
+    "    status_scrape = content_area.find_all('h2')\n",
+    "    status = status_scrape[:1]\n",
+    "    status = str(status)\n",
+    "    front_of_status = status.index('<h2>') + 4\n",
+    "    back_of_status = status.index('</h2>')\n",
+    "    status_cleaned = (f'{status[front_of_status : back_of_status]}')\n",
+    "    \n",
+    "    # FOR THE MISSING ENTRIES\n",
+    "    if 'Missing' in status_cleaned:\n",
+    "        #Now we get into pulling out individual details which will eventually be compiled in a list\n",
+    "        #NAME(MISSING)\n",
+    "        name_scrape = content_area.find_all('h3')\n",
+    "        person_name = name_scrape[:1]\n",
+    "        person_name = str(person_name)\n",
+    "        front_of_name = person_name.index('<h3>') + 4\n",
+    "        back_of_name = person_name.index('</h3>')\n",
+    "        name_cleaned = (f'{person_name[front_of_name : back_of_name]}')\n",
+    "        name_split = name_cleaned.split(',')\n",
+    "        last_name = name_split[0]\n",
+    "        first_name = name_split[1]\n",
+    "        first_name_string = str(name_split[1:2])\n",
+    "        first_name_string = first_name_string.replace('[',\"\")\n",
+    "        first_name_string = first_name_string.replace(']',\"\")\n",
+    "        first_name_string = first_name_string.replace(\"'\",\"\")\n",
+    "        first_name_string = first_name_string.replace(\"\\n\",\"\")\n",
+    "        first_name_string = first_name_string.strip()\n",
+    "        \n",
+    "        #PERSON DETAILS(MISSING)\n",
+    "        try:\n",
+    "            person_details = content_area.find_all('dd')\n",
+    "            date_missing_discovered = person_details[0]\n",
+    "            year_born = person_details[1]\n",
+    "            age_at_disappearance = person_details[2]\n",
+    "            gender = person_details[3]\n",
+    "            bio_group = person_details[4]\n",
+    "        except:\n",
+    "            print('Data error')\n",
+    "\n",
+    "    #FOR THE UNIDENTIFIED ENTRIES\n",
+    "    else:\n",
+    "        try:\n",
+    "            first_name_string = 'Unidentified'\n",
+    "            last_name = 'Unidentified'\n",
+    "            person_details = content_area.find_all('dd')\n",
+    "            date_missing_discovered = person_details[0]\n",
+    "            age_at_disappearance = person_details[1]\n",
+    "            gender = person_details[2]\n",
+    "            bio_group = person_details[3]\n",
+    "            year_born = 'Unknown'\n",
+    "        except:\n",
+    "            print('Data error2')\n",
+    "            \n",
+    "            \n",
+    "    #PUT IT ALL TOGETHER\n",
+    "    person_info.append([first_name_string , last_name , status_cleaned , cleaning_function(date_missing_discovered) , cleaning_function(year_born) , cleaning_function(age_at_disappearance) , cleaning_function(gender) , cleaning_function(bio_group) , city , province , url])\n",
+    "       "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "30062b69",
+   "metadata": {},
+   "source": [
+    "### Save the file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2b297ad3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print to a file\n",
+    "full_file = pd.DataFrame(person_info)\n",
+    "full_file.to_csv(\"output_rcmp.csv\")\n",
+    "print('Done')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72e7fbf7",
+   "metadata": {},
+   "source": [
+    "### To Avoid Running the URL Collector Again - Run Code Below"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e5e73d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('list.csv') as f:\n",
+    "    allLines = f.readlines()\n",
+    "    TempURLs = list(allLines)\n",
+    "    # remove the column header\n",
+    "    TempURLs = TempURLs[1:]\n",
+    "    f.close()\n",
+    "\n",
+    "# clean the elements  \n",
+    "URLs = []\n",
+    "for link in TempURLs:\n",
+    "    URLs.append(link.strip())\n",
+    "    \n",
+    "print(URLs[:10])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "32bc0a32",
+   "metadata": {},
+   "source": [
+    "#### Function to Turn DL sections into dict - No Longer Used!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "40cad4bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_dl_dict(soup):\n",
+    "    keys, values = [] , []\n",
+    "    for dl in soup.find_all(\"dl\", {\"class\":\"dl-horizontal\"}):\n",
+    "        for dt in dl.find_all(\"dt\"):\n",
+    "            keys.append(dt.text.strip())\n",
+    "        for dd in dl.find_all(\"dd\"):\n",
+    "            values.append(dd.text.strip())\n",
+    "    \n",
+    "    return dict(zip(keys,values))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5513dd44",
+   "metadata": {},
+   "source": [
+    "### Second Method - For More Structured Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f01a313d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#I have this because I dont know how else to filter out stuff from an if statement that I dont want\n",
+    "count_working = 0\n",
+    "\n",
+    "# complete file\n",
+    "complete_db = {}\n",
+    "\n",
+    "# used to test\n",
+    "test_URLs = URLs[:2]\n",
+    "\n",
+    "# loop through all the URLs\n",
+    "for page_url in URLs:\n",
+    "    \n",
+    "    # page dict\n",
+    "    page_dict = {}\n",
+    "    #this is where all the person info will go\n",
+    "    page_sections = []\n",
+    "    # make the full URL\n",
+    "    url = base_url + page_url\n",
+    "    \n",
+    "    print('==============================================')\n",
+    "    print(\"Record Number: \" + str(count_working))\n",
+    "    print(\"Case URL: \" + url)\n",
+    "    \n",
+    "    # next record\n",
+    "    count_working += 1\n",
+    "    \n",
+    "    # request the html\n",
+    "    try:\n",
+    "        page = requests.get(url, timeout = 10)\n",
+    "    except requests.exceptions.Timeout:\n",
+    "        print(\"Timeout occurred\")\n",
+    "    \n",
+    "    # structure the page content for parsing\n",
+    "    soup = bs(page.content, 'html.parser') \n",
+    "    \n",
+    "    #print(soup)\n",
+    "    \n",
+    "    # First we have to pull out the content area\n",
+    "    content_area = soup.find('main' , {\"property\" : \"mainContentOfPage\"})\n",
+    "    \n",
+    "    try:\n",
+    "        # the case reference number\n",
+    "        _case_ref = content_area.find('h1')\n",
+    "        page_dict['CaseRef'] = \" \".join(_case_ref.text.split())\n",
+    "        \n",
+    "        # the main section\n",
+    "        sections = content_area.section\n",
+    "        \n",
+    "        # the description\n",
+    "        desc = sections.div.p\n",
+    "        page_dict['CaseDesc'] = desc.text.strip()\n",
+    "        \n",
+    "        # the category\n",
+    "        case_type = sections.h2\n",
+    "        page_dict['CaseType'] = \" \".join(case_type.text.split())\n",
+    "    except:\n",
+    "        print('page base info collection error')\n",
+    "    \n",
+    "    page_dict[\"CaseURL\"] = url\n",
+    "    \n",
+    "    \n",
+    "    # find all the images in the persons section\n",
+    "    try:\n",
+    "        # the image link\n",
+    "        images = sections.find_all('img')\n",
+    "        imgs_list = []\n",
+    "        for image in images:\n",
+    "            image_src = image['src']\n",
+    "            # check if this matches the no photo image\n",
+    "            no_photo = re.search(\"noPhoto\\.png\", image_src)\n",
+    "            if not no_photo:\n",
+    "                # find the iamge ID\n",
+    "                img_id = re.search(\"id=(\\d+).*\", image_src)\n",
+    "                imgs_list.append(\"https://www.services.rcmp-grc.gc.ca/missing-disparus/showImage?\"+img_id.group())\n",
+    "                # add the images section    \n",
+    "        # add to the main dict\n",
+    "        page_dict['PageImages'] = imgs_list\n",
+    "    except:\n",
+    "        print(\"no images found\")\n",
+    "    \n",
+    "    \"\"\"\n",
+    "    # if we need to treat the page types differently\n",
+    "    if page_dict['CaseType'] == 'Missing':\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # get the first section with all the persons\n",
+    "    persons_section = sections.section\n",
+    "    \n",
+    "    # how many people are we looking through\n",
+    "    persons_names = persons_section.find_all('h3')\n",
+    "    num_persons = len(persons_names)\n",
+    "    # all the blocks within the section\n",
+    "    persons_blocks = persons_section.find_all('div',{\"class\":\"row\"})\n",
+    "    \n",
+    "    # loop through all the person sections to collect their data\n",
+    "    # assigned to their names\n",
+    "    for i in range(num_persons):\n",
+    "        print(\"Person(s) in Case: \"+str(i+1))\n",
+    "        block = {} # stores the individuals info, some pages have 1+\n",
+    "        block['Name'] = \" \".join(persons_names[i].text.split())\n",
+    "        \n",
+    "        # select the current persion\n",
+    "        current_person = persons_blocks[i]\n",
+    "        \n",
+    "        # takes all the DL sections out and saves them\n",
+    "        dl_sections = []\n",
+    "        for dl in current_person.find_all(\"dl\"):\n",
+    "            dl_sections.append(str(dl))\n",
+    "        block[\"InfoSection\"] = dl_sections          \n",
+    "        # add the block to the page sections\n",
+    "        page_sections.append(block)\n",
+    "        print(block['Name'])\n",
+    "        #print(persons_blocks[i])\n",
+    "        #print(block)\n",
+    "    \n",
+    "    \"\"\"\n",
+    "    # If this is an unidentified persons record\n",
+    "    else:\n",
+    "        print(\"Un IDs Body\")\n",
+    "    \n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # write the section to the dict\n",
+    "    page_dict['PersonsData'] = page_sections\n",
+    "    # write it all to the main DB\n",
+    "    complete_db[page_dict['CaseRef']] = page_dict\n",
+    "        \n",
+    "# write JSON to a file    \n",
+    "with open(\"Complete_DB.json\", \"w\") as outfile:\n",
+    "    json.dump(complete_db, outfile)\n",
+    "        \n",
+    "print('======================= Done =======================')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6123cd3b",
+   "metadata": {},
+   "source": [
+    "## Write to a file if it was not done above"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "41cf2eed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write JSON to a file    \n",
+    "with open(\"Complete_DB.json\", \"w\") as outfile:\n",
+    "    json.dump(complete_db, outfile)"
    ]
   }
  ],
  "metadata": {
+  "interpreter": {
+   "hash": "2516770f1231e9470eebfb2e8e89faf6b4b2c173f0d9550afe423a3c1e5f866c"
+  },
   "kernelspec": {
    "display_name": "Python 3 (ipykernel)",
    "language": "python",
@@ -157,7 +551,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.10.0"
   }
  },
  "nbformat": 4,
diff --git a/Makefile b/Makefile
index bbfa07f2f..e943c1faf 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,12 @@
-setup: ## Install packages via pipenv
+setup:
 	pipenv install
 
-open: ## Start the Jupyter notebook
+open-viewer: setup ## Open the notebook in an interactive viewer
 	pipenv run jupyter notebook MissingPersons_DataExtract_Tom_May10.ipynb --config jupyter_notebook_config.py
 
+run-code: setup ## Run the notebook's code in terminal (without viewer)
+	pipenv run jupyter nbconvert --to notebook --execute MissingPersons_DataExtract_Tom_May10.ipynb
+
 %:
 	@true
 
diff --git a/MissingPersons_DataExtract_Tom_May10.ipynb b/MissingPersons_DataExtract_Tom_May10.ipynb
index 3f4355851..a57d81494 100644
--- a/MissingPersons_DataExtract_Tom_May10.ipynb
+++ b/MissingPersons_DataExtract_Tom_May10.ipynb
@@ -46,7 +46,6 @@
     "import requests\n",
     "import json\n",
     "import csv\n",
-    "from webdriver_manager.chrome import ChromeDriverManager\n",
     "from selenium.webdriver.common.by import By"
    ]
   },
@@ -57,9 +56,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "link = 'https://www.services.rcmp-grc.gc.ca/missing-disparus/search-recherche.jsf'\n",
-    "# Get a chrome driver if there isn't one locally\n",
-    "browser = webdriver.Chrome(ChromeDriverManager().install()) "
+    "from webdriver_manager.chrome import ChromeDriverManager\n",
+    "from selenium.webdriver.chrome.service import Service\n",
+    "from selenium.webdriver.chrome.options import Options\n",
+    "\n",
+    "# Allow Chrome to run in headless mode (required for running in GitHub Actions)\n",
+    "options = Options()\n",
+    "options.headless = True"
    ]
   },
   {
@@ -71,6 +74,10 @@
    },
    "outputs": [],
    "source": [
+    "# Open Chrome browser. (get chrome driver if it isn't installed already)\n",
+    "browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)\n",
+    "\n",
+    "link = 'https://www.services.rcmp-grc.gc.ca/missing-disparus/search-recherche.jsf'\n",
     "browser.get(link)\n",
     "time.sleep(2)\n",
     "\n",
diff --git a/README.md b/README.md
index 96a9eb920..ec8a14532 100644
--- a/README.md
+++ b/README.md
@@ -37,13 +37,37 @@ Recommendations:
     - e.g., `brew install pyenv`
     - then `pyenv install --skip-existing` within this project, to install a known-good version of Python.
 
+To list all available commands, run `make` (without any arguments):
+
 ```
-make setup
-make install
+$ make
+Usage: make <command>
+
+where <command> is one of the following:
+
+open-viewer          Open the notebook in an interactive viewer
+run-code             Run the notebook's code in terminal (without viewer)
 ```
 
 Note: If you don't wish to use `pipenv`, just open up `Makefile` and run its commands directly.
 
+## Scheduled Cloud Run
+
+This scraper is intended to be run nightly in a cloud environment. We use GitHub Actions for this.
+
+Configuration: [`.github/workflows/scrape.yml`](/.github/workflows/scrape.yml)  
+Run History: [GitHub Action runs](https://github.com/CivicTechTO/missing-persons/actions/workflows/scrape.yml)
+
+Each successful script run in the cloud environment will save a zip file of the
+generated files. You can find this on the "Summary" page of any workflow run.
+
+For example:
+https://github.com/CivicTechTO/missing-persons/actions/runs/2382053305
+
+While this workflow will run automatically each night, you can force a run
+anytime. To do this, push to any branch a commit with `[force ci]` in the most
+recent commit message.
+
 ### 1. Web Scraping - Setting up Jupyter Notebooks & Selenium
 
 The required packages can be found in requirments.txt, bs4 and selenium are the main non-standard packages needed.