Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@

## Latest Announcements

:mega: **12/04/2025:** `dataretrieval` now features the new `waterdata` module,
:mega: **01/16/2025:** `dataretrieval` now features the `waterdata` module,
which provides access to USGS's modernized [Water Data
APIs](https://api.waterdata.usgs.gov/). The Water Data API endpoints include
daily values, **instantaneous values**, field measurements, time series metadata,
and discrete water quality data from the Samples database. This new module will
eventually replace the `nwis` module, which provides access to the legacy [NWIS
Water Services](https://waterservices.usgs.gov/).
and discrete water quality data from the [Samples database](https://waterdata.usgs.gov/download-samples/#dataProfile=site). This new module replaces the `nwis` module, which provides access to the legacy [NWIS
Water Services](https://waterservices.usgs.gov/). Take a look at the new [`waterdata` module demo notebook](demos/WaterData_demo.ipynb), which walks through an extended example using a majority of the available `waterdata` functions.

Check out the [NEWS](NEWS.md) file for all updates and announcements.

Expand Down
2 changes: 0 additions & 2 deletions dataretrieval/waterdata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

# Public API exports
from .api import (
_check_profiles,
get_codes,
get_continuous,
get_daily,
Expand Down Expand Up @@ -41,7 +40,6 @@
"get_reference_table",
"get_samples",
"get_time_series_metadata",
"_check_profiles",
"CODE_SERVICES",
"SERVICES",
"PROFILES",
Expand Down
57 changes: 26 additions & 31 deletions dataretrieval/waterdata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
from dataretrieval.waterdata.types import (
CODE_SERVICES,
METADATA_COLLECTIONS,
PROFILE_LOOKUP,
PROFILES,
SERVICES,
)
from dataretrieval.waterdata.utils import (
SAMPLES_URL,
get_ogc_data,
_construct_api_requests,
_walk_pages
_walk_pages,
_check_profiles
)

# Set up logger for this module
Expand Down Expand Up @@ -691,9 +691,13 @@ def get_time_series_metadata(
parameter_name: Optional[Union[str, List[str]]] = None,
properties: Optional[Union[str, List[str]]] = None,
statistic_id: Optional[Union[str, List[str]]] = None,
hydrologic_unit_code: Optional[Union[str, List[str]]] = None,
state_name: Optional[Union[str, List[str]]] = None,
last_modified: Optional[Union[str, List[str]]] = None,
begin: Optional[Union[str, List[str]]] = None,
end: Optional[Union[str, List[str]]] = None,
begin_utc: Optional[Union[str, List[str]]] = None,
end_utc: Optional[Union[str, List[str]]] = None,
unit_of_measure: Optional[Union[str, List[str]]] = None,
computation_period_identifier: Optional[Union[str, List[str]]] = None,
computation_identifier: Optional[Union[str, List[str]]] = None,
Expand Down Expand Up @@ -742,6 +746,17 @@ def get_time_series_metadata(
Example codes include 00001 (max), 00002 (min), and 00003 (mean).
A complete list of codes and their descriptions can be found at
https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html.
hydrologic_unit_code : string or list of strings, optional
The United States is divided and sub-divided into successively smaller
hydrologic units which are classified into four levels: regions,
sub-regions, accounting units, and cataloging units. The hydrologic
units are arranged within each other, from the smallest (cataloging units)
to the largest (regions). Each hydrologic unit is identified by a unique
hydrologic unit code (HUC) consisting of two to eight digits based on the
four levels of classification in the hydrologic unit system.
state_name : string or list of strings, optional
The name of the state or state equivalent in which the monitoring location
is located.
last_modified : string, optional
The last time a record was refreshed in our database. This may happen
due to regular operational processes and does not necessarily indicate
Expand All @@ -760,6 +775,14 @@ def get_time_series_metadata(
for the last 36 hours

begin : string or list of strings, optional
This field contains the same information as "begin_utc", but in the
local time of the monitoring location. It is retained for backwards
compatibility, but will be removed in V1 of these APIs.
end : string or list of strings, optional
This field contains the same information as "end_utc", but in the
local time of the monitoring location. It is retained for backwards
compatibility, but will be removed in V1 of these APIs.
begin_utc : string or list of strings, optional
The datetime of the earliest observation in the time series. Together
with end, this field represents the period of record of a time series.
Note that some time series may have large gaps in their collection
Expand All @@ -776,7 +799,7 @@ def get_time_series_metadata(
* Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z"
* Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours

end : string or list of strings, optional
end_utc : string or list of strings, optional
The datetime of the most recent observation in the time series. Data returned by
this endpoint updates at most once per day, and potentially less frequently than
that, and as such there may be more recent observations within a time series
Expand Down Expand Up @@ -1703,31 +1726,3 @@ def get_samples(

return df, BaseMetadata(response)


def _check_profiles(
service: SERVICES,
profile: PROFILES,
) -> None:
"""Check whether a service profile is valid.

Parameters
----------
service : string
One of the service names from the "services" list.
profile : string
One of the profile names from "results_profiles",
"locations_profiles", "activities_profiles",
"projects_profiles" or "organizations_profiles".
"""
valid_services = get_args(SERVICES)
if service not in valid_services:
raise ValueError(
f"Invalid service: '{service}'. Valid options are: {valid_services}."
)

valid_profiles = PROFILE_LOOKUP[service]
if profile not in valid_profiles:
raise ValueError(
f"Invalid profile: '{profile}' for service '{service}'. "
f"Valid options are: {valid_profiles}."
)
37 changes: 36 additions & 1 deletion dataretrieval/waterdata/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import re
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union, get_args

import pandas as pd
import requests
Expand All @@ -13,6 +13,12 @@
from dataretrieval.utils import BaseMetadata
from dataretrieval import __version__

from dataretrieval.waterdata.types import (
PROFILE_LOOKUP,
PROFILES,
SERVICES,
)

try:
import geopandas as gpd

Expand Down Expand Up @@ -498,6 +504,7 @@ def _get_resp_data(resp: requests.Response, geopd: bool) -> pd.DataFrame:
)
df.columns = [col.replace("properties_", "") for col in df.columns]
df.rename(columns={"geometry_coordinates": "geometry"}, inplace=True)
df = df.loc[:, ~df.columns.duplicated()]
return df

# Organize json into geodataframe and make sure id column comes along.
Expand Down Expand Up @@ -824,3 +831,31 @@ def get_ogc_data(
return return_list, metadata


def _check_profiles(
service: SERVICES,
profile: PROFILES,
) -> None:
"""Check whether a service profile is valid.

Parameters
----------
service : string
One of the service names from the "services" list.
profile : string
One of the profile names from "results_profiles",
"locations_profiles", "activities_profiles",
"projects_profiles" or "organizations_profiles".
"""
valid_services = get_args(SERVICES)
if service not in valid_services:
raise ValueError(
f"Invalid service: '{service}'. Valid options are: {valid_services}."
)

valid_profiles = PROFILE_LOOKUP[service]
if profile not in valid_profiles:
raise ValueError(
f"Invalid profile: '{profile}' for service '{service}'. "
f"Valid options are: {valid_profiles}."
)

Loading