diff --git a/README.rst b/README.rst index bbedd1b..f1108ab 100644 --- a/README.rst +++ b/README.rst @@ -83,6 +83,14 @@ There are two options to do this: Accessing the SnowEx data ----------------- + +There are two ways to access SnowEx data through this library: + +1. **Direct Database Access** (requires database credentials) +2. **Lambda Client** (requires AWS credentials for serverless access) + +Direct Database Access +======================= A programmatic API has been created for fast and standard access to Point and Layer data. There are two examples_ covering the features and usage of the api. See the specific api_ documentation for @@ -99,6 +107,49 @@ detailed description. ) print(df.head()) +Lambda Client (Serverless Access) +================================== +For users who prefer serverless access or don't want to manage database +connections, we provide an AWS Lambda-based client with a public Function URL. + +**No credentials required!** The Lambda function handles all database +credentials internally via AWS Secrets Manager. + +**Requirements:** + +* No AWS credentials needed - public HTTP endpoint +* No database credentials needed - handled by Lambda +* requests library installed (included with snowexsql) + +**Usage:** + +.. code-block:: python + + from snowexsql.lambda_client import SnowExLambdaClient + from datetime import date + + # Initialize client - no credentials needed! + client = SnowExLambdaClient() + + # Get measurement classes + classes = client.get_measurement_classes() + PointMeasurements = classes['PointMeasurements'] + + # Query data (same API as direct access) + df = PointMeasurements.from_filter( + date=date(2020, 5, 28), instrument='camera' + ) + +See the `lambda_example notebook `_ +for complete examples. + +**How It Works:** + +- Public Lambda Function URL allows anyone to query the database +- Database credentials stored securely in AWS Secrets Manager (never exposed) +- Database only accepts connections from Lambda (not public internet) +- All queries go through Lambda for security and monitoring + Getting help ------------ diff --git a/deployment/scripts/test_lambda.sh b/deployment/scripts/test_lambda.sh index 4884de3..17bead0 100755 --- a/deployment/scripts/test_lambda.sh +++ b/deployment/scripts/test_lambda.sh @@ -2,11 +2,13 @@ # Test script for the deployed Lambda function # This script tests the basic functionality of the deployed Lambda +# Supports both direct Lambda invocation (requires AWS creds) and Function URL (public) set -e AWS_REGION="us-west-2" LAMBDA_FUNCTION_NAME="lambda-snowex-sql" +FUNCTION_URL="https://izwsawyfkxss5vawq5v64mruqy0ahxek.lambda-url.us-west-2.on.aws" # Colors for output GREEN='\033[0;32m' @@ -16,32 +18,77 @@ NC='\033[0m' # No Color echo -e "${YELLOW}Testing Lambda function: ${LAMBDA_FUNCTION_NAME}${NC}" +# Determine test method +if [ "$1" = "--function-url" ] || [ "$1" = "-u" ]; then + TEST_METHOD="function-url" + echo -e "${YELLOW}Using Function URL (public access - no AWS credentials required)${NC}" +else + TEST_METHOD="boto3" + echo -e "${YELLOW}Using boto3 invocation (requires AWS credentials)${NC}" + echo -e "${YELLOW}Tip: Use --function-url flag to test public Function URL instead${NC}" +fi + # Test 1: Basic connectivity test echo -e "${YELLOW}Test 1: Basic database connectivity...${NC}" -aws lambda invoke \ - --region ${AWS_REGION} \ - --function-name ${LAMBDA_FUNCTION_NAME} \ - --cli-binary-format raw-in-base64-out \ - --payload '{"action":"test_connection"}' \ - test_response.json -if [ $? -eq 0 ]; then - echo -e "${GREEN}✓ Lambda invocation successful${NC}" - echo -e "${YELLOW}Response:${NC}" - if command -v jq >/dev/null 2>&1; then - # Show full response and decoded body +if [ "$TEST_METHOD" = "function-url" ]; then + # Test via Function URL + echo -e "${YELLOW}Testing via HTTP POST to: ${FUNCTION_URL}${NC}" + HTTP_STATUS=$(curl -s -o test_response.json -w "%{http_code}" \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"action":"test_connection"}' \ + "${FUNCTION_URL}") + + if [ "$HTTP_STATUS" = "200" ]; then + echo -e "${GREEN}✓ Function URL request successful (HTTP ${HTTP_STATUS})${NC}" + else + echo -e "${RED}✗ Function URL request failed (HTTP ${HTTP_STATUS})${NC}" + cat test_response.json + exit 1 + fi +else + # Test via AWS CLI (boto3) + aws lambda invoke \ + --region ${AWS_REGION} \ + --function-name ${LAMBDA_FUNCTION_NAME} \ + --cli-binary-format raw-in-base64-out \ + --payload '{"action":"test_connection"}' \ + test_response.json + + if [ $? -eq 0 ]; then + echo -e "${GREEN}✓ Lambda invocation successful${NC}" + else + echo -e "${RED}✗ Lambda invocation failed${NC}" + exit 1 + fi +fi + +# Display response +echo -e "${YELLOW}Response:${NC}" + +# Display response +echo -e "${YELLOW}Response:${NC}" +if command -v jq >/dev/null 2>&1; then + # For boto3 invocation, parse nested body; for Function URL, show direct response + if [ "$TEST_METHOD" = "boto3" ]; then echo -e "${YELLOW}Full invoke response:${NC}" jq . test_response.json echo -e "${YELLOW}Decoded body:${NC}" jq -r '.body' test_response.json | jq . 2>/dev/null || jq -r '.body' test_response.json else - echo -e "${YELLOW}jq not found; using Python to pretty-print JSON${NC}" - if command -v python3 >/dev/null 2>&1; then + echo -e "${YELLOW}Function URL response:${NC}" + jq . test_response.json + fi +else + echo -e "${YELLOW}jq not found; using Python to pretty-print JSON${NC}" + if command -v python3 >/dev/null 2>&1; then + if [ "$TEST_METHOD" = "boto3" ]; then echo -e "${YELLOW}Full invoke response:${NC}" python3 -m json.tool < test_response.json || cat test_response.json echo -e "${YELLOW}Decoded body:${NC}" - python3 - "$AWS_REGION" << 'PY' -import json,sys + python3 - << 'PY' +import json try: data=json.load(open('test_response.json','r')) body=data.get('body') @@ -52,52 +99,29 @@ try: print(body) else: print(json.dumps(body, indent=2)) -except Exception as e: +except Exception: print(open('test_response.json','r').read()) PY else - cat test_response.json + python3 -m json.tool < test_response.json || cat test_response.json fi + else + cat test_response.json fi -else - echo -e "${RED}✗ Lambda invocation failed${NC}" - exit 1 fi -############################################# -# Test 2: Check logs (best-effort) -############################################# -echo -e "${YELLOW}Test 2: Checking recent logs...${NC}" -LOG_GROUP=$(aws logs describe-log-groups \ - --region ${AWS_REGION} \ - --log-group-name-prefix "/aws/lambda/${LAMBDA_FUNCTION_NAME}" \ - --query 'logGroups[0].logGroupName' \ - --output text 2>/dev/null || echo "") +echo "" +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}✓ All tests passed!${NC}" +echo -e "${GREEN}========================================${NC}" +echo "" +echo -e "${YELLOW}Usage tips:${NC}" +echo -e " • Test with Function URL (public): ./test_lambda.sh --function-url" +echo -e " • Test with boto3 (requires creds): ./test_lambda.sh" +echo "" -if [ -z "$LOG_GROUP" ] || [ "$LOG_GROUP" = "None" ]; then - echo -e "${YELLOW}No log group found yet for ${LAMBDA_FUNCTION_NAME}. Skipping log fetch.${NC}" -else - LOG_STREAM=$(aws logs describe-log-streams \ - --region ${AWS_REGION} \ - --log-group-name "$LOG_GROUP" \ - --order-by LastEventTime \ - --descending \ - --max-items 1 \ - --query 'logStreams[0].logStreamName' \ - --output text 2>/dev/null || echo "") - - if [ -z "$LOG_STREAM" ] || [ "$LOG_STREAM" = "None" ]; then - echo -e "${YELLOW}No recent log stream found. It can take a few seconds for logs to appear.${NC}" - else - aws logs get-log-events \ - --region ${AWS_REGION} \ - --log-group-name "$LOG_GROUP" \ - --log-stream-name "$LOG_STREAM" \ - --limit 10 \ - --query 'events[*].message' \ - --output text || true - fi -fi +# Cleanup +rm -f test_response.json # Cleanup rm -f test_response.json diff --git a/docs/gallery/lambda_example.ipynb b/docs/gallery/lambda_example.ipynb index 2133fab..2d021b2 100644 --- a/docs/gallery/lambda_example.ipynb +++ b/docs/gallery/lambda_example.ipynb @@ -1,5 +1,28 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "9b0c625a", + "metadata": {}, + "source": [ + "## 🚀 No Credentials Required!\n", + "\n", + "This notebook uses the **SnowEx Lambda Client** with a public Function URL.\n", + "\n", + "**You don't need:**\n", + "- ❌ AWS credentials\n", + "- ❌ Database credentials \n", + "- ❌ Any setup or configuration\n", + "\n", + "**How it works:**\n", + "- The Lambda Function URL is public and accessible to anyone\n", + "- Database credentials are handled securely by AWS Secrets Manager\n", + "- Database only accepts connections from Lambda (not public internet)\n", + "- Just run the cells and start exploring SnowEx data!\n", + "\n", + "---" + ] + }, { "cell_type": "markdown", "id": "d4d8e20d", diff --git a/pyproject.toml b/pyproject.toml index be65765..eb1a09e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,8 @@ dependencies = [ "rasterio <2.0", "SQLAlchemy <3.0", "boto3 <2.0", + "requests >=2.31.0", + "urllib3 >=2.0", ] diff --git a/snowexsql/lambda_client.py b/snowexsql/lambda_client.py index 9f33a16..cd9d47a 100644 --- a/snowexsql/lambda_client.py +++ b/snowexsql/lambda_client.py @@ -3,23 +3,31 @@ Lightweight client for accessing SnowEx database via AWS Lambda function. Provides serverless access to snow data without requiring -heavy geospatial dependencies. +AWS credentials or heavy geospatial dependencies. + +Uses Lambda Function URL for public HTTP access. """ -import boto3 import json +import os import pandas as pd -from typing import Dict, Any +import requests +from typing import Dict, Any, Optional from datetime import datetime, date +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry class SnowExLambdaClient: """ - Client for accessing SnowEx data via AWS Lambda + Client for accessing SnowEx data via AWS Lambda Function URL This client provides serverless access to the SnowEx database through - a deployed Lambda function, eliminating the need for direct database - connections or heavy geospatial dependencies. + a public Lambda Function URL, eliminating the need for AWS credentials, + database connections, or heavy geospatial dependencies. + + The Lambda function handles all database credentials securely via + AWS Secrets Manager. The client mirrors the api.py class structure, providing access to: - PointMeasurements: Point data measurements @@ -39,20 +47,58 @@ class SnowExLambdaClient: >>> instruments = client.point_measurements.all_instruments """ - def __init__( - self, - region: str = 'us-west-2', - function_name: str = 'lambda-snowex-sql' - ): + # Default production Lambda Function URL + DEFAULT_FUNCTION_URL = 'https://izwsawyfkxss5vawq5v64mruqy0ahxek.lambda-url.us-west-2.on.aws' + + def __init__(self, function_url: Optional[str] = None): """ - Initialize the Lambda client + Initialize the Lambda client with Function URL Args: - region: AWS region where the Lambda function is deployed - function_name: Name of the deployed Lambda function + function_url: Lambda Function URL (https://....lambda-url.us-west-2.on.aws/) + If None, uses SNOWEX_LAMBDA_URL environment variable + or default production URL. + + No AWS credentials required - uses public HTTP endpoint. """ - self.lambda_client = boto3.client('lambda', region_name=region) - self.function_name = function_name + # Get Function URL from parameter, environment, or default + self.function_url = ( + function_url or + os.environ.get('SNOWEX_LAMBDA_URL') or + self.DEFAULT_FUNCTION_URL + ) + + # Validate URL + if not self.function_url or self.function_url == 'PASTE_YOUR_FUNCTION_URL_HERE': + raise ValueError( + "\n\n" + + "=" * 70 + "\n" + + "Lambda Function URL Not Configured\n" + + "=" * 70 + "\n\n" + + "Please provide the Lambda Function URL in one of these ways:\n\n" + + "1. Pass directly to constructor:\n" + + " client = SnowExLambdaClient(function_url='https://...')\n\n" + + "2. Set environment variable:\n" + + " export SNOWEX_LAMBDA_URL='https://...'\n\n" + + "3. Update DEFAULT_FUNCTION_URL in lambda_client.py\n\n" + + "Contact the SnowEx team if you need the Function URL.\n" + + "=" * 70 + ) + + # Remove trailing slash if present + self.function_url = self.function_url.rstrip('/') + + # Setup HTTP session with retries for reliability + self.session = requests.Session() + retry_strategy = Retry( + total=3, + backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504], + allowed_methods=["POST"] + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + self.session.mount("https://", adapter) + self.session.mount("http://", adapter) # Dynamically create class-based accessors from available # measurement classes @@ -68,24 +114,8 @@ def query(self, sql_query: str) -> pd.DataFrame: Returns: pd.DataFrame: Query results as a DataFrame """ - payload = { - 'action': 'query', - 'sql': sql_query - } - - response = self.lambda_client.invoke( - FunctionName=self.function_name, - InvocationType='RequestResponse', - Payload=json.dumps(payload) - ) - - result = json.loads(response['Payload'].read()) - - if result.get('statusCode') != 200: - raise Exception(f"Lambda query failed: {result.get('body')}") - - body = json.loads(result['body']) - return pd.DataFrame(body.get('data', [])) + result = self._invoke_lambda('query', sql=sql_query) + return pd.DataFrame(result.get('data', [])) def _create_measurement_clients(self): """ @@ -214,7 +244,7 @@ def _serialize_payload(self, obj): def _invoke_lambda(self, action: str, **kwargs) -> dict: """ - Internal method to invoke Lambda function + Internal method to invoke Lambda function via HTTP POST Args: action: The action to perform @@ -234,30 +264,59 @@ def _invoke_lambda(self, action: str, **kwargs) -> dict: payload = self._serialize_payload(payload) try: - response = self.lambda_client.invoke( - FunctionName=self.function_name, - InvocationType='RequestResponse', - Payload=json.dumps(payload) + response = self.session.post( + self.function_url, + json=payload, + headers={'Content-Type': 'application/json'}, + timeout=30 # 30 second timeout ) - result = json.loads(response['Payload'].read().decode('utf-8')) + # Check HTTP status + if response.status_code != 200: + error_text = response.text[:500] if response.text else 'No response body' + raise Exception( + f"Lambda returned HTTP {response.status_code}: {error_text}" + ) - # Check if result has the expected structure - if 'body' not in result: - raise Exception(f"Unexpected Lambda response format: {result}") + # Parse JSON response + result = response.json() - body = json.loads(result['body']) + # Check for Lambda errors + if 'errorMessage' in result: + raise Exception(f"Lambda error: {result['errorMessage']}") - # Check for errors in the response - if 'error' in body: - raise Exception(f"Lambda returned error: {body['error']}") + # Check for application-level errors + if 'error' in result: + raise Exception(f"Query error: {result['error']}") + + if not result.get('success', True): + error_msg = result.get('error', 'Unknown error') + raise Exception(f"Request failed: {error_msg}") - return body + return result + except requests.exceptions.Timeout: + raise Exception( + "Request timed out after 30 seconds. The query may be too complex " + "or the database is slow. Try adding a 'limit' parameter to reduce result size." + ) + except requests.exceptions.ConnectionError as e: + raise Exception( + f"Could not connect to Lambda function at:\n{self.function_url}\n\n" + f"Please verify:\n" + f"1. The Function URL is correct\n" + f"2. You have internet connectivity\n" + f"3. The Lambda function is deployed and active\n\n" + f"Connection error: {str(e)}" + ) + except requests.exceptions.RequestException as e: + raise Exception(f"HTTP request failed: {str(e)}") except json.JSONDecodeError as e: - raise Exception(f"Failed to parse Lambda response: {str(e)}") - except Exception as e: - raise Exception(f"Lambda invocation failed: {str(e)}") + response_preview = response.text[:200] if hasattr(response, 'text') else 'N/A' + raise Exception( + f"Failed to parse Lambda response as JSON: {str(e)}\n" + f"Response preview: {response_preview}" + ) def test_connection(self) -> Dict[str, Any]: """ @@ -625,18 +684,14 @@ def __repr__(self): # Convenience function for quick client creation -def create_client( - region: str = 'us-west-2', - function_name: str = 'lambda-snowex-sql' -) -> SnowExLambdaClient: +def create_client(function_url: Optional[str] = None) -> SnowExLambdaClient: """ Create a SnowExLambdaClient instance Args: - region: AWS region where the Lambda function is deployed - function_name: Name of the deployed Lambda function + function_url: Lambda Function URL (optional) Returns: SnowExLambdaClient instance """ - return SnowExLambdaClient(region=region, function_name=function_name) \ No newline at end of file + return SnowExLambdaClient(function_url=function_url) \ No newline at end of file diff --git a/snowexsql/lambda_handler.py b/snowexsql/lambda_handler.py index 279f516..0be8737 100644 --- a/snowexsql/lambda_handler.py +++ b/snowexsql/lambda_handler.py @@ -463,7 +463,22 @@ def lambda_handler(event: Dict[str, Any], context: Any): This is the function the Lambda runtime will call when we set the handler to `snowexsql.lambda_handler.lambda_handler` in the container CMD. + + Handles both direct Lambda invocation and Function URL HTTP requests. """ + # Parse event based on invocation type + # Function URLs wrap the payload in an HTTP structure + if 'body' in event and isinstance(event.get('body'), str): + # Function URL invocation - parse JSON body + try: + parsed_event = json.loads(event['body']) + except json.JSONDecodeError as e: + error_body = json.dumps({'error': f'Invalid JSON in request body: {str(e)}'}) + return {'statusCode': 400, 'body': error_body} + else: + # Direct Lambda invocation (boto3) - use event as-is + parsed_event = event + secret_name = os.environ.get('DB_SECRET_NAME') region = os.environ.get('DB_AWS_REGION') @@ -479,7 +494,7 @@ def lambda_handler(event: Dict[str, Any], context: Any): return {'statusCode': 500, 'body': error_body} try: - result = handle_event_with_secret(event, secret) + result = handle_event_with_secret(parsed_event, secret) return {'statusCode': 200, 'body': json.dumps(result)} except Exception as e: LOG.exception('Handler failed')