From 1981d1e1002b4cf2155033182bf186e0a14bf959 Mon Sep 17 00:00:00 2001 From: Sanika Vaze Date: Thu, 24 Jul 2025 11:34:36 -0700 Subject: [PATCH 1/3] Fix cell execution order in 01-clean-country-socioecon-data.ipynb --- .../01-clean-country-socioecon-data.ipynb | 154 +++++++++--------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb b/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb index 4dc6370..4fed20b 100644 --- a/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb +++ b/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb @@ -113,6 +113,82 @@ "this = gpd.GeoDataFrame(geometry=[read_shapefile(sset.PATH_GLOBAL_MANGROVES_RAW).make_valid().unary_union], crs=4326).to_parquet(sset.PATH_GLOBAL_MANGROVES_INT)" ] }, + { + "cell_type": "markdown", + "id": "3207b1e9-62fd-43b1-8265-a923d4ab8eb7", + "metadata": {}, + "source": [ + "## Get country codes from GADM" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "994246ad-0616-43c7-a4b8-675c2a2e1d65", + "metadata": {}, + "outputs": [], + "source": [ + "ccode_mapping = (\n", + " read_shapefile(sset.PATH_GADM, layer=0)\n", + " .set_index(\"COUNTRY\")\n", + " .GID_0.rename(\"ccode\")\n", + " .rename_axis(\"name\")\n", + ")\n", + "# drop numerical china/india/pakistan GID_0's\n", + "ccode_mapping = ccode_mapping[ccode_mapping.str[1] != \"0\"]\n", + "\n", + "# add on manaully added segments (which account for uninhabited areas not in GADM)\n", + "ccode_mapping = pd.concat(\n", + " (ccode_mapping, pd.read_parquet(sset.PATH_SEG_PTS_MANUAL).ccode)\n", + ")\n", + "\n", + "# add some manual mappers\n", + "# Netherlands Antilles in CIA WFB corresponds to these three (not ABW)\n", + "manual = sset.CCODE_MANUAL.copy()\n", + "manual[\"Netherlands Antilles\"] = \"BES+CUW+SXM\"\n", + "ccode_mapping = pd.concat([ccode_mapping, manual]).sort_index()\n", + "\n", + "# Handle no-accent names\n", + "alt_index = (\n", + " ccode_mapping.index.str.normalize(\"NFKD\")\n", + " .str.encode(\"ascii\", errors=\"ignore\")\n", + " .astype(str)\n", + ")\n", + "alt = pd.Series(ccode_mapping.values, index=alt_index, name=\"ccode\")\n", + "ccode_mapping = (\n", + " pd.concat((ccode_mapping, alt))\n", + " .reset_index()\n", + " .drop_duplicates()\n", + " .set_index(\"name\")\n", + " .ccode.sort_index()\n", + ")\n", + "\n", + "# getting list of valid ccodes including some previously uncaptured mixtures (i.e.\n", + "# France + overseas depts)\n", + "valid_ccodes = np.setdiff1d(\n", + " np.unique(\n", + " np.concatenate(\n", + " (\n", + " ccode_mapping.unique(),\n", + " [k for v in sset.PPP_CCODE_IF_MSNG.values() for k in v],\n", + " )\n", + " )\n", + " ),\n", + " sset.EXCLUDED_ISOS,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f0525373-d81a-4710-b729-de857effa1ef", + "metadata": {}, + "outputs": [], + "source": [ + "save(pd.DataFrame({\"ccode\": valid_ccodes}), sset.PATH_ALL_VALID_HIST_CCODES)\n", + "save(ccode_mapping.to_frame(), sset.PATH_HIST_CCODE_MAPPING)" + ] + }, { "cell_type": "markdown", "id": "530700ee-d556-45c8-a254-1333a3a459c7", @@ -242,82 +318,6 @@ "save_geoparquet(adm1, sset.PATH_GADM_ADM1_INT)" ] }, - { - "cell_type": "markdown", - "id": "3207b1e9-62fd-43b1-8265-a923d4ab8eb7", - "metadata": {}, - "source": [ - "## Get country codes from GADM" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "994246ad-0616-43c7-a4b8-675c2a2e1d65", - "metadata": {}, - "outputs": [], - "source": [ - "ccode_mapping = (\n", - " read_shapefile(sset.PATH_GADM, layer=0)\n", - " .set_index(\"COUNTRY\")\n", - " .GID_0.rename(\"ccode\")\n", - " .rename_axis(\"name\")\n", - ")\n", - "# drop numerical china/india/pakistan GID_0's\n", - "ccode_mapping = ccode_mapping[ccode_mapping.str[1] != \"0\"]\n", - "\n", - "# add on manaully added segments (which account for uninhabited areas not in GADM)\n", - "ccode_mapping = pd.concat(\n", - " (ccode_mapping, pd.read_parquet(sset.PATH_SEG_PTS_MANUAL).ccode)\n", - ")\n", - "\n", - "# add some manual mappers\n", - "# Netherlands Antilles in CIA WFB corresponds to these three (not ABW)\n", - "manual = sset.CCODE_MANUAL.copy()\n", - "manual[\"Netherlands Antilles\"] = \"BES+CUW+SXM\"\n", - "ccode_mapping = pd.concat([ccode_mapping, manual]).sort_index()\n", - "\n", - "# Handle no-accent names\n", - "alt_index = (\n", - " ccode_mapping.index.str.normalize(\"NFKD\")\n", - " .str.encode(\"ascii\", errors=\"ignore\")\n", - " .astype(str)\n", - ")\n", - "alt = pd.Series(ccode_mapping.values, index=alt_index, name=\"ccode\")\n", - "ccode_mapping = (\n", - " pd.concat((ccode_mapping, alt))\n", - " .reset_index()\n", - " .drop_duplicates()\n", - " .set_index(\"name\")\n", - " .ccode.sort_index()\n", - ")\n", - "\n", - "# getting list of valid ccodes including some previously uncaptured mixtures (i.e.\n", - "# France + overseas depts)\n", - "valid_ccodes = np.setdiff1d(\n", - " np.unique(\n", - " np.concatenate(\n", - " (\n", - " ccode_mapping.unique(),\n", - " [k for v in sset.PPP_CCODE_IF_MSNG.values() for k in v],\n", - " )\n", - " )\n", - " ),\n", - " sset.EXCLUDED_ISOS,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "f0525373-d81a-4710-b729-de857effa1ef", - "metadata": {}, - "outputs": [], - "source": [ - "save(pd.DataFrame({\"ccode\": valid_ccodes}), sset.PATH_ALL_VALID_HIST_CCODES)\n", - "save(ccode_mapping.to_frame(), sset.PATH_HIST_CCODE_MAPPING)" - ] - }, { "cell_type": "markdown", "id": "647b195b", @@ -1471,7 +1471,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.2" }, "widgets": { "application/vnd.jupyter.widget-state+json": { From bfa67b9e9a3851ad2ce4754d4d3a53640c3a41f6 Mon Sep 17 00:00:00 2001 From: Sanika Vaze Date: Wed, 30 Jul 2025 10:33:35 -0700 Subject: [PATCH 2/3] Updated download link for Credit Suisse GWBD in 03 Data Acquisition Notebook and Removed unecessary cell (mangroves) in 01 Data Cleaning Notebook --- .../03-download-sliiders-input-data.ipynb | 9 ++------- .../01-clean-country-socioecon-data.ipynb | 18 ------------------ 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/notebooks/data-acquisition/03-download-sliiders-input-data.ipynb b/notebooks/data-acquisition/03-download-sliiders-input-data.ipynb index 4775659..ef3e73f 100644 --- a/notebooks/data-acquisition/03-download-sliiders-input-data.ipynb +++ b/notebooks/data-acquisition/03-download-sliiders-input-data.ipynb @@ -527,12 +527,7 @@ "with sset.PATH_GWDB_RAW.open(\"wb\") as f:\n", " f.write(\n", " requests.get(\n", - " \"https://www.ubs.com/global/en/family-office-uhnw/reports/\"\n", - " \"global-wealth-report-2023/_jcr_content/mainpar/toplevelgrid_5684475/col2/\"\n", - " \"linklistnewlook/link_copy.0357374027.file/\"\n", - " \"PS9jb250ZW50L2RhbS9hc3NldHMvd20vZ2xvYmFsL2ltZy9nbG9iYWwtZmFtaWx5LW9mZmljZS\"\n", - " \"9kb2NzL2RhdGFib29rLWdsb2JhbC13ZWFsdGgtcmVwb3J0LTIwMjMtZW4ucGRm/\"\n", - " \"databook-global-wealth-report-2023-en.pdf\"\n", + " \"https://urldefense.us/v3/__https:/www.ubs.com/global/en/family-office-uhnw/reports/global-wealth-report-2023/_jcr_content/mainpar/toplevelgrid_5684475_1708633751/col1/innergrid/xcol1/actionbutton_copy_co.1784379955.file/PS9jb250ZW50L2RhbS9hc3NldHMvd20vZ2xvYmFsL2ltZy9nbG9iYWwtZmFtaWx5LW9mZmljZS9kb2NzL2d3ci0yMDIzLWVuLTIucGRm/gwr-2023-en-2.pdf__;!!PvBDto6Hs4WbVuu7!LkcjRnpeJpav7n7pccnu9GARkLWeYSdBrUR7_2wbXi9tKQiH0E3ypwI9h8cm0uHmnY0eFJ3da07dQQScVsUHry39Hg$\"\n", " ).content\n", " )" ] @@ -1740,7 +1735,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.2" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb b/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb index 4fed20b..b30a04a 100644 --- a/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb +++ b/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb @@ -95,24 +95,6 @@ ")" ] }, - { - "cell_type": "markdown", - "id": "dbff1f30-3dd4-4666-9df2-5358f456f5eb", - "metadata": {}, - "source": [ - "## Mangroves" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bc40d4b-ab6f-40de-b37f-cef0b2cffe6c", - "metadata": {}, - "outputs": [], - "source": [ - "this = gpd.GeoDataFrame(geometry=[read_shapefile(sset.PATH_GLOBAL_MANGROVES_RAW).make_valid().unary_union], crs=4326).to_parquet(sset.PATH_GLOBAL_MANGROVES_INT)" - ] - }, { "cell_type": "markdown", "id": "3207b1e9-62fd-43b1-8265-a923d4ab8eb7", From 5c753000e5ab45159cbca169850da989a1bea687 Mon Sep 17 00:00:00 2001 From: Sanika Vaze Date: Wed, 30 Jul 2025 15:45:31 -0700 Subject: [PATCH 3/3] remove unused cellfrom target notebook --- .../01-clean-country-socioecon-data.ipynb | 152 +++++++++--------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb b/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb index b30a04a..79e40aa 100644 --- a/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb +++ b/notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb @@ -95,82 +95,6 @@ ")" ] }, - { - "cell_type": "markdown", - "id": "3207b1e9-62fd-43b1-8265-a923d4ab8eb7", - "metadata": {}, - "source": [ - "## Get country codes from GADM" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "994246ad-0616-43c7-a4b8-675c2a2e1d65", - "metadata": {}, - "outputs": [], - "source": [ - "ccode_mapping = (\n", - " read_shapefile(sset.PATH_GADM, layer=0)\n", - " .set_index(\"COUNTRY\")\n", - " .GID_0.rename(\"ccode\")\n", - " .rename_axis(\"name\")\n", - ")\n", - "# drop numerical china/india/pakistan GID_0's\n", - "ccode_mapping = ccode_mapping[ccode_mapping.str[1] != \"0\"]\n", - "\n", - "# add on manaully added segments (which account for uninhabited areas not in GADM)\n", - "ccode_mapping = pd.concat(\n", - " (ccode_mapping, pd.read_parquet(sset.PATH_SEG_PTS_MANUAL).ccode)\n", - ")\n", - "\n", - "# add some manual mappers\n", - "# Netherlands Antilles in CIA WFB corresponds to these three (not ABW)\n", - "manual = sset.CCODE_MANUAL.copy()\n", - "manual[\"Netherlands Antilles\"] = \"BES+CUW+SXM\"\n", - "ccode_mapping = pd.concat([ccode_mapping, manual]).sort_index()\n", - "\n", - "# Handle no-accent names\n", - "alt_index = (\n", - " ccode_mapping.index.str.normalize(\"NFKD\")\n", - " .str.encode(\"ascii\", errors=\"ignore\")\n", - " .astype(str)\n", - ")\n", - "alt = pd.Series(ccode_mapping.values, index=alt_index, name=\"ccode\")\n", - "ccode_mapping = (\n", - " pd.concat((ccode_mapping, alt))\n", - " .reset_index()\n", - " .drop_duplicates()\n", - " .set_index(\"name\")\n", - " .ccode.sort_index()\n", - ")\n", - "\n", - "# getting list of valid ccodes including some previously uncaptured mixtures (i.e.\n", - "# France + overseas depts)\n", - "valid_ccodes = np.setdiff1d(\n", - " np.unique(\n", - " np.concatenate(\n", - " (\n", - " ccode_mapping.unique(),\n", - " [k for v in sset.PPP_CCODE_IF_MSNG.values() for k in v],\n", - " )\n", - " )\n", - " ),\n", - " sset.EXCLUDED_ISOS,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "f0525373-d81a-4710-b729-de857effa1ef", - "metadata": {}, - "outputs": [], - "source": [ - "save(pd.DataFrame({\"ccode\": valid_ccodes}), sset.PATH_ALL_VALID_HIST_CCODES)\n", - "save(ccode_mapping.to_frame(), sset.PATH_HIST_CCODE_MAPPING)" - ] - }, { "cell_type": "markdown", "id": "530700ee-d556-45c8-a254-1333a3a459c7", @@ -300,6 +224,82 @@ "save_geoparquet(adm1, sset.PATH_GADM_ADM1_INT)" ] }, + { + "cell_type": "markdown", + "id": "3207b1e9-62fd-43b1-8265-a923d4ab8eb7", + "metadata": {}, + "source": [ + "## Get country codes from GADM" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "994246ad-0616-43c7-a4b8-675c2a2e1d65", + "metadata": {}, + "outputs": [], + "source": [ + "ccode_mapping = (\n", + " read_shapefile(sset.PATH_GADM, layer=0)\n", + " .set_index(\"COUNTRY\")\n", + " .GID_0.rename(\"ccode\")\n", + " .rename_axis(\"name\")\n", + ")\n", + "# drop numerical china/india/pakistan GID_0's\n", + "ccode_mapping = ccode_mapping[ccode_mapping.str[1] != \"0\"]\n", + "\n", + "# add on manaully added segments (which account for uninhabited areas not in GADM)\n", + "ccode_mapping = pd.concat(\n", + " (ccode_mapping, pd.read_parquet(sset.PATH_SEG_PTS_MANUAL).ccode)\n", + ")\n", + "\n", + "# add some manual mappers\n", + "# Netherlands Antilles in CIA WFB corresponds to these three (not ABW)\n", + "manual = sset.CCODE_MANUAL.copy()\n", + "manual[\"Netherlands Antilles\"] = \"BES+CUW+SXM\"\n", + "ccode_mapping = pd.concat([ccode_mapping, manual]).sort_index()\n", + "\n", + "# Handle no-accent names\n", + "alt_index = (\n", + " ccode_mapping.index.str.normalize(\"NFKD\")\n", + " .str.encode(\"ascii\", errors=\"ignore\")\n", + " .astype(str)\n", + ")\n", + "alt = pd.Series(ccode_mapping.values, index=alt_index, name=\"ccode\")\n", + "ccode_mapping = (\n", + " pd.concat((ccode_mapping, alt))\n", + " .reset_index()\n", + " .drop_duplicates()\n", + " .set_index(\"name\")\n", + " .ccode.sort_index()\n", + ")\n", + "\n", + "# getting list of valid ccodes including some previously uncaptured mixtures (i.e.\n", + "# France + overseas depts)\n", + "valid_ccodes = np.setdiff1d(\n", + " np.unique(\n", + " np.concatenate(\n", + " (\n", + " ccode_mapping.unique(),\n", + " [k for v in sset.PPP_CCODE_IF_MSNG.values() for k in v],\n", + " )\n", + " )\n", + " ),\n", + " sset.EXCLUDED_ISOS,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f0525373-d81a-4710-b729-de857effa1ef", + "metadata": {}, + "outputs": [], + "source": [ + "save(pd.DataFrame({\"ccode\": valid_ccodes}), sset.PATH_ALL_VALID_HIST_CCODES)\n", + "save(ccode_mapping.to_frame(), sset.PATH_HIST_CCODE_MAPPING)" + ] + }, { "cell_type": "markdown", "id": "647b195b",