diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 10a6d77..5a95cca 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -31,11 +31,19 @@ Thank you for your interest in contributing to the DocumentDB Functional Tests! ## Writing Tests +For comprehensive testing guidance, see our detailed documentation: + +- **[Quick Reference](docs/testing/QUICK_REFERENCE.md)** - Fast lookup for all testing rules +- **[Test Format Guide](docs/testing/TEST_FORMAT.md)** - Test structure, naming, assertions, and tags +- **[Test Coverage Guide](docs/testing/TEST_COVERAGE.md)** - Coverage strategies and edge case testing +- **[Folder Structure Guide](docs/testing/FOLDER_STRUCTURE.md)** - Where to put tests with decision tree + ### Test File Organization - Place tests in the appropriate directory based on the operation being tested - Use descriptive file names: `test_.py` - Group related tests in the same file +- See [Folder Structure Guide](docs/testing/FOLDER_STRUCTURE.md) for detailed organization rules ### Test Structure @@ -57,15 +65,23 @@ def test_descriptive_name(collection): - Any special conditions or edge cases """ # Arrange - Insert test data - collection.insert_one({"name": "Alice", "age": 30}) + collection.insert_one({"a": 1, "b": 2}) - # Act - Execute the operation being tested - result = collection.find({"name": "Alice"}) + # Execute the operation being tested, use runCommand format + execute_command(collection, {"find": collection.name, filter: {"a": 1}}) - # Assert - Verify expected behavior - assert len(list(result)) == 1 + # Assert expected behavior, don't use plain assert for consistent failure log format + # Assert whole output when possible, to catch all unexpected regression + expected = [{"_id": 0, "a": 1, "b": 2}] + assertSuccess(result, expected) ``` +### Test Case Guidelines + +- Each test function defines one test case +- One assertion per test function +- Use execute_command for all MongoDB operations + ### Naming Conventions - **Test functions**: `test_` @@ -131,10 +147,10 @@ The framework provides three main fixtures: collection.insert_one({"name": "Alice"}) # Act - Execute operation - result = collection.find_one({"name": "Alice"}) + result = execute_command(collection, {"find": collection.name, filter: {"name": "Alice"}}) # Assert - Verify results - assert result["name"] == "Alice" + assertSuccess(result, {"name": "Alice"}) # Collection automatically dropped after test ``` diff --git a/docs/testing/TEST_COVERAGE.md b/docs/testing/TEST_COVERAGE.md new file mode 100644 index 0000000..3bd2afd --- /dev/null +++ b/docs/testing/TEST_COVERAGE.md @@ -0,0 +1,355 @@ +# DocumentDB Comprehensive Test Coverage Guidelines + +**Purpose**: Define comprehensive test coverage requirements for DocumentDB features to ensure complete validation. + +--- + +## Core Testing Principles + +### 1. Data Type Coverage +**Rule**: Every command/operator input must be tested with ALL DocumentDB data types. Use constants and datasets from `framework/test_constants.py` for consistent test values. + +**Standard Data Types** (non-deprecated BSON types): +- **Numeric**: double, int, long, decimal128 → `NUMERIC`, `NUMERIC_INT32`, `NUMERIC_INT64`, `NUMERIC_DOUBLE`, `NUMERIC_DECIMAL128` +- **String**: string +- **Boolean**: bool +- **Date**: date +- **Null**: null +- **Object**: object +- **Array**: array +- **Binary data**: binData +- **ObjectId**: objectId +- **Regular Expression**: regex +- **JavaScript**: javascript +- **Timestamp**: timestamp +- **MinKey**: minKey +- **MaxKey**: maxKey + +**Type Test Matrix**: +- Valid types (should succeed) +- Invalid types (should fail with appropriate error code) +- Type conversion behavior +- Type precedence in operations + +--- + +### 2. Arithmetic Operator Coverage +**Rule**: Arithmetic operators must test all numeric type combinations and edge cases. + +**Multi-Input Operators** (e.g., $add, $subtract, $multiply, $divide, $mod): + +**Numeric Type Combinations**: +``` +double + double → double +double + int → double +double + long → double +double + decimal128 → decimal128 +int + int → int +int + long → long +int + decimal128 → decimal128 +long + long → long +long + decimal128 → decimal128 +decimal128 + decimal128 → decimal128 +``` + +**Single-Input Operators** (e.g., $ln, $log10, $exp, $abs, $ceil, $floor, $sqrt, $trunc): + +**Input → Output Type Conversion**: +``` +double → double +int → double (for operators that produce non-integer results like $ln, $exp, $sqrt) +int → int (for operators that preserve integers like $abs, $ceil, $floor, $trunc) +long → double (for operators that produce non-integer results) +long → long (for operators that preserve integers) +decimal128 → decimal128 +``` +Note: Check each operator's specific return type rules. The general pattern is: decimal128 input always returns decimal128; other numeric types return double unless the operator preserves integer types. + +Note: Distinguish between fractional doubles (2.5) and whole-number doubles (3.0). Some operators coerce whole-number doubles to int, producing different behavior than fractional doubles. + +**Edge Cases for Arithmetic**: +- **NaN handling**: operation with NaN → NaN +- **Infinity handling**: + - number + Infinity → Infinity + - Infinity + Infinity → Infinity + - Infinity + (-Infinity) → NaN + - -Infinity + (-Infinity) → -Infinity +- **Overflow**: `INT32_MAX` + 1 → long, `INT64_MAX` + 1 → double +- **Underflow**: `INT32_MIN` - 1 → long, `INT64_MIN` - 1 → double +- **Boundary adjacency**: `INT32_MIN_PLUS_1`, `INT64_MIN_PLUS_1` (verify no off-by-one errors) +- **Sign handling**: positive, negative, zero +- **Negative zero**: `DOUBLE_NEGATIVE_ZERO` → verify behavior (some operators normalize to `0.0`, others preserve `-0.0`); `DECIMAL128_NEGATIVE_ZERO` → verify; `NumberDecimal("-0E+N")` and `NumberDecimal("-0E-N")` → verify exponent preservation +- **Special values**: MinKey, MaxKey combinations +- **Two's complement asymmetry (single-input operators)**: `INT32_MIN` has no positive int counterpart → must promote to long; `INT64_MIN` has no positive long counterpart → verify overflow/error behavior +- **Double precision boundaries**: `DOUBLE_NEAR_MAX`, `DOUBLE_MIN_SUBNORMAL`, `DOUBLE_MIN_NEGATIVE_SUBNORMAL`, `DOUBLE_NEAR_MIN`, `DOUBLE_NEGATIVE_ZERO` → use `NUMERIC_DOUBLE` from `test_constants.py` + +--- + +### 3. Expression Type Coverage +**Rule**: Expression types and field paths must be tested at two levels: thorough framework-level tests (under `expressions/`), and per-operator smoke tests (under each operator's folder). + +**Expression Types**: +- **Literal**: `1`, `"hello"`, `true` +- **Field**: `"$x"`, `"$a.b"` +- **System Variables**: `$$ROOT`, `$$CURRENT`, `$$REMOVE` +- **Expression operator**: `{$abs: -1}`, `{$add: [1, 2]}` +- **Array expression**: `["$x", "$y"]`, `[{$abs: -1}]` +- **Object expression**: `{a: "$x"}`, `{a: {$abs: -1}}` + +#### Framework-Level Tests (under `expressions/`) +Tests the expression parser/evaluator mechanics. Run once with a representative operator. + +**Embedding/Nesting (thorough)**: +- Expression in object: `{a: {$abs: -1}}` +- Expression in array: `[{$abs: -1}]` +- Nested expressions: `{$add: [{$abs: "$x"}, {$abs: "$y"}]}` +- Deep nesting: `{$add: [1, {$abs: {$ceil: {$sqrt: "$x"}}}]}` +- Complex: `{a: [{$add: [1, {$abs: "$x"}]}, {b: {$ceil: "$y"}}]}` + +**Field Path Resolution (thorough)**: +- Simple: `"$a"` on `{a: 1}` +- Nested object: `"$a.b"` on `{a: {b: 1}}` +- Composite array: `"$a.b"` on `{a: [{b: 1}, {b: 2}]}` +- Array index: `"$a.0.b"` on `{a: [{b: 1}, {b: 2}]}` +- Deep nested: `"$a.b.c.d"` on `{a: {b: [{c: [{d: 1}]}]}}` +- Non-existent: `"$missing"` → null +- Non-existent nested: `"$x.y.0"` on `{}` +- Array index path in expression context: `"$a.0.b"` — valid in filter query, verify behavior in aggregation expressions + +**System Variables (thorough)**: +- `$$ROOT` in various nesting contexts +- `$$CURRENT` equivalence to field paths +- `$$REMOVE` in conditional branches +- `$let` with complex variable definitions + +#### Per-Operator Tests (under each operator's folder) +Each operator must test these because behavior differs per operator and per input: + +**Expression type smoke (one test per type)**: verify the operator accepts literal, field, expression operator, array expression input, object expression input, and composite array input: +- With array expression input: `{$add: [["$x", "$y"]]}` — array containing field references +- With object expression input: `{$add: {a: "$x"}}` — object with field reference values +- With composite array input: doc `{a: [{b: 1}, {b: 2}]}`, expression `{$add: "$a.b"}` — field path resolving to array from array-of-objects + +**`$missing` field behavior**: operators handling differ. Must test per operator and per input position + +**Array index paths in expression context**: `"$a.0.b"` — verify whether the operator supports this for `{"a": {"0": {"b": 1}}}` or `{"a": [{"b": 1}, {"b": 2}]}` + +**System variables**: only test if the official documentation says the operator supports them + +--- + +### 4. Argument Handling +**Rule 1**: Test various argument counts and formats. + +**Argument Count Variations**: +- **No arguments**: `{$add: []}` +- **Single argument**: `{$add: [1]}` and `{$add: 1}` +- **Two arguments**: `{$add: [1, 2]}` +- **Multiple arguments**: `{$add: [1, 2, 3, 4]}` +- **Argument of array type**: `{$add: [[1, 2, 3]]}` + +**Rule 2**: Each input position must be tested independently against all applicable rules. Different input positions may accept different types. + +**Per-Input-Position Coverage**: +- **Data types**: test every valid and invalid data type per position +- **Expression types**: literal, field, expression operator, array expression, object expression per position +- **All other applicable rules** from this document (edge cases, special values, etc.) + +**Examples**: +- `$add`: 2 kinds of input — 1st accepts date or numeric; 2nd and beyond accept numeric only. +- `$sum`: 1 kind of input — all inputs are numeric, test rules once on any single input position (no need to repeat for each position). +- `$subtract`: 2 inputs, each must be tested with every data type, expression type, and applicable rules independently. +- `$cond`: 3 inputs (condition, then, else) — each has different valid types and semantics. + +**Rule 3**: Test correlations between inputs that interact. Only test meaningful combinations where both inputs are valid types but their interaction produces different behavior. + +**Correlation Testing**: +- Identify which inputs interact (affect each other's behavior or output) +- Test valid cross-input combinations that produce different behavior +- Skip combinations where one input doesn't affect the other's semantics +- Skip (invalid, invalid) pairs — different engines may parse inputs in different order, returning different error codes for the same (invalid, invalid) combination. Per-input tests already verify each invalid type is rejected. + +**Examples**: +- `$add`: 1st is date → test 2nd with all numeric types (int/long/double/decimal128). No need to test 2nd with invalid types again (already covered by per-input testing). +- `$unwind`: `path` and `includeArrayIndex` can conflict (same path) → test permutations. `preserveNullAndEmptyArrays` has no significant interaction with the other two → no correlation tests needed. + +--- + +### 5. Date Arithmetic Coverage +**Rule**: Date operations must test numeric additions and special cases. + +**Date Test Cases**: +- **Date + numeric types**: ISODate + int/long/double/decimal128 +- **Rounding behavior**: + - Date + 0.5 (rounds down) + - Date + 0.6 (rounds up) +- **Invalid combinations**: + - Date + Date (should fail) + - Date + Infinity (should fail) + - Date + NaN (should fail) + - Date + non-numeric types (should fail) +- **Overflow**: Date + LONG_MAX (should fail) + +--- + +### 6. Error Code Validation +**Rule**: Invalid operations must return correct error codes. Only assert on error codes, not error messages — messages are not part of the spec and may change between versions. + +**Common Error Codes**: +- **14**: Type mismatch +- **16554**: Type mismatch (pre-8.0) +- **15**: Invalid date arithmetic +- **16612**: Multiple dates in operation +- **16555**: Division by zero +- **28714**: Overflow in conversion + +**Error Test Pattern**: +``` +For each invalid_type in [string, object, array, ...]: + Test operation with invalid_type fails with error code X +``` + +--- + +### 7. Decimal128 Precision Coverage +**Rule**: Decimal128 operations must test precision boundaries. Use `NUMERIC_DECIMAL128` and individual constants from `test_constants.py` (e.g., `DECIMAL128_MAX`, `DECIMAL128_MIN`, `DECIMAL128_SMALL_EXPONENT`). + +**Decimal128 Test Cases**: +- **Precision boundaries**: `DECIMAL128_MAX`, `DECIMAL128_MIN`, `DECIMAL128_SMALL_EXPONENT`, `DECIMAL128_LARGE_EXPONENT`, `DECIMAL128_ZERO`, `DECIMAL128_NEGATIVE_ZERO` +- **High precision**: Results with >35 digits +- **Exponent boundaries**: Maximum and minimum exponents +- **Rounding behavior**: Precision loss scenarios +- **Special values**: Decimal128 Infinity, NaN + +--- + +### 8. Null Field Handling +**Rule**: Test null propagation and missing field behavior. + +**Null Patterns**: +- **Null propagation**: operation(value, null) → null +- **Null + Null**: null + +--- + +### 9. Type Conversion Coverage +**Rule**: Test implicit type conversions and precedence. + +**Type Conversion Matrix**: +``` +int + int → int +int + long → long +int + double → double +int + decimal128 → decimal128 +long + long → long +long + double → double +long + decimal128 → decimal128 +double + double → double +double + decimal128 → decimal128 +decimal128 + decimal128 → decimal128 +``` + +**Conversion Rules**: +- Decimal128 has highest precedence +- Double has second highest precedence +- Long has third highest precedence +- Int has lowest precedence + +--- + +### 10. Numeric Equivalence in Grouping/Comparison +**Rule**: Test that numerically equivalent values across types are treated as identical for grouping, matching, and deduplication. + +**Equivalence Groups**: +- `NumberInt(1)`, `NumberLong(1)`, `1.0`, `NumberDecimal("1")` → same group +- `NumberInt(0)`, `NumberLong(0)`, `0.0`, `NumberDecimal("0")` → same group +- `false` vs `0`, `true` vs `1` → **NOT** equivalent (BSON type distinction) + +**Applies to**: `$group`, `$match`, `$lookup`, `$addToSet`, `$setUnion`, `$setIntersection`, indexes, `$eq`/`$ne` comparisons, `distinct` + +--- + +### 11. BSON Type Distinction +**Rule**: Test that values of different BSON types are treated as distinct even when they appear equivalent in some languages. + +**Key Distinctions**: +- `false` vs `NumberInt(0)` → distinct +- `true` vs `NumberInt(1)` → distinct +- `null` vs `$missing` → check per-operator (some treat as same, some don't) +- `""` (empty string) vs `null` → distinct + +**Applies to**: any context involving comparison, grouping, deduplication, or matching + +--- + +### 12. Expression Operator in Pipeline Contexts +**Rule**: Each expression operator must have one test case in each pipeline context. When generating tests for an operator (e.g., `$add`), create one test case per context in the corresponding stage/feature folder. + +**Pipeline Contexts** (one test case per operator per context): +- In `core/operator/aggregation/stages/project`: `{$project: {result: {$op: "$field"}}}` +- In `core/operator/aggregation/stages/addFields`: `{$addFields: {result: {$op: "$field"}}}` +- In `core/operator/aggregation/stages/match` with `$expr`: `{$match: {$expr: {$gt: [{$op: "$field"}, value]}}}` +- In `core/operator/aggregation/stages/group` expression: `{$group: {_id: null, result: {$max: {$op: "$field"}}}}` and `{$group: {_id: {$max: {$op: "$field"}}}}` +- Don't need to add for every operator in find filter $expr: (should have same behavior with aggregation with $expr) +- Don't need to add for every operator in `find()` computed projection: (should have same behavior with aggregation) +- Don't need to add for every operator in `core/operator/aggregation/stages/set`: (alias for `$addFields`, separate code path) +- Don't need to add for every operator in `$lookup` and `$facet` pipeline: (too deep nesting) + +**Example**: generating `$add` tests adds test cases in these files: +- `stages/project/test_operators_in_project.py` +- `stages/addFields/test_operators_in_addFields.py` +- `stages/match/test_operators_in_match_expr.py` +- `stages/group/test_operators_in_group.py` + +**Applies to**: all expression operators (`$abs`, `$add`, `$ceil`, `$floor`, `$sqrt`, `$concat`, etc.) + +--- + +## Test Category Checklist + +For any DocumentDB feature, ensure coverage of: + +- [ ] **Argument handling**: empty, single, multiple arguments; per-input-position coverage of types, expressions, and applicable rules +- [ ] **Input correlation**: meaningful cross-input combinations where inputs interact; skip redundant invalid-type cross-products +- [ ] **Expression types (smoke)**: one test per type — literal, field, expression operator, array expression input (`[["$x", "$y"]]`), object expression input (`{a: "$x"}`) +- [ ] **`$missing` field behavior**: per operator, per input position +- [ ] **Array index paths**: `$a.0.b` in expression context — verify validity outside filter queries +- [ ] **Null/$missing propagation**: per operator, per input position — short-circuit vs propagate vs ignore +- [ ] **NaN handling**: NaN propagation and combinations +- [ ] **Infinity handling**: Infinity, -Infinity combinations +- [ ] **Type validation**: all valid and invalid types +- [ ] **Date arithmetic**: date operations and edge cases (if applicable) +- [ ] **Edge cases**: boundary values, special combinations +- [ ] **Field lookup**: simple, nested, array, non-existent, composite, composite array +- [ ] **Sign handling**: positive, negative, zero +- [ ] **Type conversion**: all numeric type combinations +- [ ] **Overflow handling**: `INT32_MAX`, `INT64_MAX` boundaries +- [ ] **Underflow handling**: `INT32_MIN`, `INT64_MIN` boundaries +- [ ] **Decimal128 precision**: high precision, boundaries (if applicable) +- [ ] **Error codes**: correct error codes for invalid operations +- [ ] **Numeric equivalence**: equivalent values across numeric types grouped/matched correctly (if applicable) +- [ ] **BSON type distinction**: different BSON types treated as distinct (if applicable) +- [ ] **Pipeline stage interaction**: interaction with preceding/following stages (if pipeline stage) +- [ ] **Pipeline contexts**: one test case per operator per context — $project, $addFields, $match+$expr, $group (if expression operator) +- [ ] **System variables**: $$ROOT, $$CURRENT, $$REMOVE, $let — only if official documentation says supported +- [ ] **Negative zero**: `DOUBLE_NEGATIVE_ZERO` and `DECIMAL128_NEGATIVE_ZERO` behavior (if numeric operator) +- [ ] **Double precision boundaries**: `DOUBLE_NEAR_MAX`, `DOUBLE_MIN_SUBNORMAL`, `DOUBLE_NEAR_MIN` (if accepts double) + +--- + +## Standard Test Datasets + +All test constants and datasets are defined in `framework/test_constants.py`. Import from there — do not duplicate values. + +**Key datasets**: +- `NUMERIC` — all numeric boundary values across int32, int64, double, float, decimal128 +- `NUMERIC_INT32`, `NUMERIC_INT64`, `NUMERIC_DOUBLE`, `NUMERIC_FLOAT`, `NUMERIC_DECIMAL128` — per-type lists +- `NEGATIVE_NUMERIC`, `ZERO_NUMERIC`, `POSITIVE_NUMERIC` — sign-grouped lists +- `NOT_A_NUMBER` — `float("nan")`, `Decimal128("nan")` +- Individual constants: `INT32_MIN`, `INT32_MAX`, `INT64_MIN`, `INT64_MAX`, `DOUBLE_NEGATIVE_ZERO`, `DECIMAL128_MAX`, `DECIMAL128_MIN`, `MISSING`, etc. + +**Not yet in `test_constants.py`** (add as needed): +- Date dataset (ISODate values) +- Non-numeric dataset (string, object, array, BinData, ObjectId, bool, Timestamp, MinKey, MaxKey, UUID) + +--- diff --git a/docs/testing/TEST_FORMAT.md b/docs/testing/TEST_FORMAT.md new file mode 100644 index 0000000..17734f7 --- /dev/null +++ b/docs/testing/TEST_FORMAT.md @@ -0,0 +1,125 @@ +# Test Format Guide + +## Test Structure + +Every API test follows: Setup → Execute → Assert. + +```python +def test_descriptive_name(collection): + """Clear description of what this test validates.""" + # Setup (insert documents if needed) + collection.insert_many([{"_id": 0, "a": 1}, {"_id": 1, "a": 2}]) + + # Execute — always use runCommand format + result = execute_command(collection, { + "find": collection.name, + "filter": {"a": 1} + }) + + # Assert — use framework assertion helpers + assertSuccess(result, [{"_id": 0, "a": 1}]) +``` + +## Naming + +**Files:** `test_.py` — files in feature subfolders must include the feature name. +``` +✅ /tests/aggregate/unwind/test_unwind_path.py +❌ /tests/aggregate/unwind/test_path.py +``` + +**Functions:** `test_` — descriptive, self-documenting. +``` +✅ test_find_with_gt_operator, test_unwind_preserves_null_arrays +❌ test_1, test_query, test_edge_case +``` + +## Assertions + +Use helpers from `framework.assertions`, not plain `assert`: + +```python +# assertResult — parametrized tests mixing success and error cases +assertResult(result, expected=5) # checks cursor.firstBatch == [{"result": 5}] +assertResult(result, error_code=16555) # checks error code only + +# assertSuccess — raw command output +assertSuccess(result, [{"_id": 0, "a": 1}]) +assertSuccess(result, expected, ignore_order=True) + +# assertFailureCode — error cases (only check code, not message) +assertFailureCode(result, 14) +``` + +**One assertion per test function.** Split multiple assertions into separate tests. + +## Fixtures + +- `collection` — most common. Auto cleanup after test. Insert documents in test body. +- `database_client` — when you need multiple collections or database-level ops. Auto dropped after test. +- `engine_client` — raw client access. + +## Execute Command + +Always use `execute_command()` with runCommand format to get test result, not driver methods. Setups can use methods. + +```python +# ✅ runCommand format +result = execute_command(collection, {"find": collection.name, "filter": {"a": 1}}) + +# ❌ Driver methods +result = collection.find({"a": 1}) +``` + +## Helper Functions + +Avoid deep helper function chains. One layer of abstraction on top of `execute_command()` is acceptable, don't add more abstraction layers unless justified. + +```python +# ✅ Good: execute_expression wraps execute_command with aggregate pipeline boilerplate +result = execute_expression(collection, {"$add": [1, 2]}) + +# ❌ Bad: trivial wrappers that just save a few characters add indirection for no clarity gain +# result = execute_operator(collection, "$add", [1, 2]) +``` + +Keep helpers in `utils/` at each test level. Helpers should reduce meaningful boilerplate (e.g., building an aggregate pipeline), not just shorten a single line. + +Minimize helper scope — one helper should do one thing. If a helper has many if/else branches handling different cases, split it into separate helpers at a lower folder level. + +## Parametrized Tests + +Use `@pytest.mark.parametrize` with dataclasses for operators with many test cases: + +```python +@dataclass(frozen=True) +class DivideTest(BaseTestCase): + dividend: Any = None + divisor: Any = None + +DIVIDE_TESTS: list[DivideTest] = [ + DivideTest("int32", dividend=10, divisor=2, expected=5.0, msg="Should divide int32 values"), + DivideTest("null_divisor", dividend=10, divisor=None, expected=None, msg="Should return null when divisor is null"), + DivideTest("string_err", dividend=10, divisor="string", error_code=TYPE_MISMATCH_ERROR, msg="Should reject string"), +] + +@pytest.mark.parametrize("test", DIVIDE_TESTS, ids=lambda t: t.id) +def test_divide(collection, test): + """Test $divide operator.""" + result = execute_expression(collection, {"$divide": [test.dividend, test.divisor]}) + assertResult(result, expected=test.expected, error_code=test.error_code, msg=test.msg) +``` + +- `BaseTestCase` (from `framework.test_case`) provides `id`, `expected`, `error_code`, `msg` — extend it per operator +- Shared helpers/dataclasses live in `utils/` at each level +- `msg` is **required** — describes expected behavior, not input +- Use constants from `framework.test_constants` (`INT32_MAX`, `FLOAT_NAN`, etc.) and `framework.error_codes` (`TYPE_MISMATCH_ERROR`, etc.) + +## Validation + +A pytest hook auto-validates during collection: +- Files must match `test_*.py` (except `__init__.py`) +- Test functions must have docstrings +- Must use assertion helpers, not plain `assert` +- One assertion per test function +- Must use `execute_command()` or helpers from utils diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/arithmetic/divide/test_operator_divide.py b/documentdb_tests/compatibility/tests/core/operator/expressions/arithmetic/divide/test_operator_divide.py new file mode 100644 index 0000000..e22a042 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/arithmetic/divide/test_operator_divide.py @@ -0,0 +1,736 @@ +import math +from dataclasses import dataclass +from typing import Any + +import pytest +from bson import Decimal128, Int64 + +from documentdb_tests.compatibility.tests.core.operator.expressions.utils.utils import ( + execute_expression, + execute_expression_with_insert, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import DIVIDE_BY_ZERO_ERROR, TYPE_MISMATCH_ERROR +from documentdb_tests.framework.test_case import BaseTestCase +from documentdb_tests.framework.test_constants import ( + DECIMAL128_HALF, + DECIMAL128_INFINITY, + DECIMAL128_JUST_ABOVE_HALF, + DECIMAL128_JUST_BELOW_HALF, + DECIMAL128_MAX, + DECIMAL128_MIN, + DECIMAL128_NAN, + DECIMAL128_NEGATIVE_INFINITY, + DECIMAL128_SMALL_EXPONENT, + DOUBLE_HALF, + DOUBLE_JUST_ABOVE_HALF, + DOUBLE_JUST_BELOW_HALF, + DOUBLE_MAX_SAFE_INTEGER, + DOUBLE_MIN_SUBNORMAL, + DOUBLE_NEAR_MAX, + DOUBLE_NEAR_MIN, + DOUBLE_ONE_AND_HALF, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + INT32_MAX, + INT32_MAX_MINUS_1, + INT32_MIN, + INT32_MIN_PLUS_1, + INT64_MAX, + INT64_MAX_MINUS_1, + INT64_MIN, + INT64_MIN_PLUS_1, + MISSING, +) + + +@dataclass(frozen=True) +class DivideTest(BaseTestCase): + """Test case for $divide operator.""" + + dividend: Any = None + divisor: Any = None + + +DIVIDE_TESTS: list[DivideTest] = [ + # Same type operations + DivideTest( + "same_type_int32", dividend=10, divisor=2, expected=5.0, msg="Should divide int32 values" + ), + DivideTest( + "same_type_int64", + dividend=Int64(10), + divisor=Int64(2), + expected=5.0, + msg="Should divide int64 values", + ), + DivideTest( + "same_type_double", + dividend=10.0, + divisor=2.0, + expected=5.0, + msg="Should divide double values", + ), + DivideTest( + "same_type_decimal", + dividend=Decimal128("10"), + divisor=Decimal128("2"), + expected=Decimal128("5"), + msg="Should divide decimal128 values", + ), + # Mixed numeric types + DivideTest( + "int32_int64", + dividend=10, + divisor=Int64(2), + expected=5.0, + msg="Should divide int32 by int64", + ), + DivideTest( + "int32_double", dividend=10, divisor=2.0, expected=5.0, msg="Should divide int32 by double" + ), + DivideTest( + "int32_decimal", + dividend=10, + divisor=Decimal128("2"), + expected=Decimal128("5"), + msg="Should divide int32 by decimal128", + ), + DivideTest( + "int64_double", + dividend=Int64(10), + divisor=2.0, + expected=5.0, + msg="Should divide int64 by double", + ), + DivideTest( + "int64_decimal", + dividend=Int64(10), + divisor=Decimal128("2"), + expected=Decimal128("5"), + msg="Should divide int64 by decimal128", + ), + DivideTest( + "double_decimal", + dividend=10.0, + divisor=Decimal128("2"), + expected=Decimal128("5.0000000000000"), + msg="Should divide double by decimal128", + ), + # Basic division operations + DivideTest( + "evenly_divisible", + dividend=20, + divisor=4, + expected=5.0, + msg="Should return exact result for even division", + ), + DivideTest( + "repeating_decimal", + dividend=10, + divisor=3, + expected=pytest.approx(3.333333333333333), + msg="Should return repeating decimal", + ), + DivideTest( + "hundred_div_seven", + dividend=100, + divisor=7, + expected=pytest.approx(14.285714285714286), + msg="Should return repeating decimal for 100/7", + ), + DivideTest( + "one_div_two", dividend=1, divisor=2, expected=0.5, msg="Should return fractional result" + ), + DivideTest( + "smaller_dividend", + dividend=5, + divisor=10, + expected=0.5, + msg="Should return fraction when dividend < divisor", + ), + DivideTest( + "one_div_ten", dividend=1, divisor=10, expected=0.1, msg="Should return 0.1 for 1/10" + ), + # Negative numbers + DivideTest( + "negative_dividend", + dividend=-10, + divisor=2, + expected=-5.0, + msg="Should return negative when dividend is negative", + ), + DivideTest( + "negative_divisor", + dividend=10, + divisor=-2, + expected=-5.0, + msg="Should return negative when divisor is negative", + ), + DivideTest( + "both_negative", + dividend=-10, + divisor=-2, + expected=5.0, + msg="Should return positive when both are negative", + ), + DivideTest( + "negative_seventeen", + dividend=-17, + divisor=5, + expected=-3.4, + msg="Should handle negative non-even division", + ), + DivideTest( + "negative_one_div_two", + dividend=-1, + divisor=2, + expected=-0.5, + msg="Should return negative fraction", + ), + # Zero dividend + DivideTest( + "zero_dividend", + dividend=0, + divisor=5, + expected=0.0, + msg="Should return 0 when dividend is zero", + ), + DivideTest( + "zero_dividend_negative_divisor", + dividend=0, + divisor=-5, + expected=-0.0, + msg="Should return -0.0 for 0 divided by negative", + ), + DivideTest( + "zero_double", + dividend=0.0, + divisor=3.0, + expected=0.0, + msg="Should return 0.0 for double zero dividend", + ), + # Fractional operations + DivideTest( + "fractional_dividend", + dividend=10.5, + divisor=3.0, + expected=3.5, + msg="Should handle fractional dividend", + ), + DivideTest( + "fractional_divisor", + dividend=10.0, + divisor=2.5, + expected=4.0, + msg="Should handle fractional divisor", + ), + DivideTest( + "both_fractional", + dividend=10.5, + divisor=2.5, + expected=4.2, + msg="Should handle both fractional operands", + ), + DivideTest( + "small_fractional", + dividend=5.5, + divisor=2.2, + expected=2.5, + msg="Should handle small fractional operands", + ), + # Constant-based boundary tests + # Int32 boundaries + DivideTest( + "int32_max_dividend", + dividend=INT32_MAX, + divisor=10, + expected=214748364.7, + msg="Should handle INT32_MAX as dividend", + ), + DivideTest( + "int32_max_minus_1_dividend", + dividend=INT32_MAX_MINUS_1, + divisor=10, + expected=214748364.6, + msg="Should handle INT32_MAX-1 as dividend", + ), + DivideTest( + "int32_min_dividend", + dividend=INT32_MIN, + divisor=10, + expected=-214748364.8, + msg="Should handle INT32_MIN as dividend", + ), + DivideTest( + "int32_min_plus_1_dividend", + dividend=INT32_MIN_PLUS_1, + divisor=10, + expected=-214748364.7, + msg="Should handle INT32_MIN+1 as dividend", + ), + # Int64 boundaries + DivideTest( + "int64_max_dividend", + dividend=INT64_MAX, + divisor=Int64(10), + expected=pytest.approx(9.223372036854776e17), + msg="Should handle INT64_MAX as dividend", + ), + DivideTest( + "int64_max_minus_1_dividend", + dividend=INT64_MAX_MINUS_1, + divisor=Int64(10), + expected=pytest.approx(9.223372036854776e17), + msg="Should handle INT64_MAX-1 as dividend", + ), + DivideTest( + "int64_min_dividend", + dividend=INT64_MIN, + divisor=Int64(10), + expected=pytest.approx(-9.223372036854776e17), + msg="Should handle INT64_MIN as dividend", + ), + DivideTest( + "int64_min_plus_1_dividend", + dividend=INT64_MIN_PLUS_1, + divisor=Int64(10), + expected=pytest.approx(-9.223372036854776e17), + msg="Should handle INT64_MIN+1 as dividend", + ), + # Double boundaries + DivideTest( + "double_min_subnormal_dividend", + dividend=DOUBLE_MIN_SUBNORMAL, + divisor=2, + expected=pytest.approx(2.5e-324), + msg="Should handle smallest subnormal double", + ), + DivideTest( + "double_near_min_divisor", + dividend=1, + divisor=DOUBLE_NEAR_MIN, + expected=pytest.approx(1e308), + msg="Should handle near-min double as divisor", + ), + DivideTest( + "double_near_max_dividend", + dividend=DOUBLE_NEAR_MAX, + divisor=2, + expected=pytest.approx(5e307), + msg="Should handle near-max double as dividend", + ), + DivideTest( + "double_max_safe_integer", + dividend=DOUBLE_MAX_SAFE_INTEGER, + divisor=2, + expected=4503599627370496.0, + msg="Should handle max safe integer as dividend", + ), + # Decimal128 boundaries + DivideTest( + "decimal128_max_dividend", + dividend=DECIMAL128_MAX, + divisor=Decimal128("2"), + expected=Decimal128("5.000000000000000000000000000000000E+6144"), + msg="Should handle DECIMAL128_MAX as dividend", + ), + DivideTest( + "decimal128_min_dividend", + dividend=DECIMAL128_MIN, + divisor=Decimal128("2"), + expected=Decimal128("-5.000000000000000000000000000000000E+6144"), + msg="Should handle DECIMAL128_MIN as dividend", + ), + DivideTest( + "decimal128_small_exponent", + dividend=DECIMAL128_SMALL_EXPONENT, + divisor=Decimal128("2"), + expected=Decimal128("5E-6144"), + msg="Should handle small exponent decimal128", + ), + # Infinity constants + DivideTest( + "float_inf_dividend", + dividend=FLOAT_INFINITY, + divisor=2, + expected=FLOAT_INFINITY, + msg="Should return infinity when dividing infinity", + ), + DivideTest( + "float_inf_divisor", + dividend=10, + divisor=FLOAT_INFINITY, + expected=0.0, + msg="Should return 0 when dividing by infinity", + ), + DivideTest( + "float_neg_inf_dividend", + dividend=FLOAT_NEGATIVE_INFINITY, + divisor=2, + expected=FLOAT_NEGATIVE_INFINITY, + msg="Should return -infinity when dividing -infinity", + ), + DivideTest( + "float_neg_inf_divisor", + dividend=10, + divisor=FLOAT_NEGATIVE_INFINITY, + expected=-0.0, + msg="Should return -0.0 when dividing by -infinity", + ), + DivideTest( + "neg_inf_dividend_neg_divisor", + dividend=FLOAT_NEGATIVE_INFINITY, + divisor=-2, + expected=FLOAT_INFINITY, + msg="Should return +infinity for -inf/-2", + ), + DivideTest( + "decimal128_inf_dividend", + dividend=DECIMAL128_INFINITY, + divisor=2, + expected=DECIMAL128_INFINITY, + msg="Should return decimal infinity when dividing decimal infinity", + ), + DivideTest( + "decimal128_neg_inf_dividend", + dividend=DECIMAL128_NEGATIVE_INFINITY, + divisor=2, + expected=DECIMAL128_NEGATIVE_INFINITY, + msg="Should return decimal -infinity when dividing decimal -infinity", + ), + DivideTest( + "decimal_neg_inf_dividend_neg_divisor", + dividend=DECIMAL128_NEGATIVE_INFINITY, + divisor=Decimal128("-2"), + expected=DECIMAL128_INFINITY, + msg="Should return decimal +infinity for -inf/-2", + ), + # NaN constants + DivideTest( + "float_nan_dividend", + dividend=FLOAT_NAN, + divisor=2, + expected=pytest.approx(math.nan, nan_ok=True), + msg="Should return NaN when dividend is NaN", + ), + DivideTest( + "float_nan_divisor", + dividend=10, + divisor=FLOAT_NAN, + expected=pytest.approx(math.nan, nan_ok=True), + msg="Should return NaN when divisor is NaN", + ), + DivideTest( + "both_nan", + dividend=FLOAT_NAN, + divisor=FLOAT_NAN, + expected=pytest.approx(math.nan, nan_ok=True), + msg="Should return NaN when both are NaN", + ), + DivideTest( + "inf_div_inf", + dividend=FLOAT_INFINITY, + divisor=FLOAT_INFINITY, + expected=pytest.approx(math.nan, nan_ok=True), + msg="Should return NaN for infinity/infinity", + ), + DivideTest( + "decimal128_nan_dividend", + dividend=DECIMAL128_NAN, + divisor=2, + expected=DECIMAL128_NAN, + msg="Should return decimal NaN when dividend is decimal NaN", + ), + DivideTest( + "decimal128_nan_divisor", + dividend=10, + divisor=DECIMAL128_NAN, + expected=DECIMAL128_NAN, + msg="Should return decimal NaN when divisor is decimal NaN", + ), + # Precision + DivideTest( + "decimal_precision", + dividend=Decimal128("10"), + divisor=Decimal128("3"), + expected=Decimal128("3.333333333333333333333333333333333"), + msg="Should preserve decimal128 precision for 10/3", + ), + DivideTest( + "decimal_precision_complex", + dividend=Decimal128("100"), + divisor=Decimal128("7"), + expected=Decimal128("14.28571428571428571428571428571429"), + msg="Should preserve decimal128 precision for 100/7", + ), + DivideTest( + "million_div_seven", + dividend=1000000, + divisor=7, + expected=pytest.approx(142857.14285714286), + msg="Should handle large dividend with repeating result", + ), + DivideTest( + "tiny_divisor", + dividend=1, + divisor=DOUBLE_NEAR_MIN, + expected=pytest.approx(1e308), + msg="Should handle very small divisor", + ), + DivideTest( + "tiny_divisor_hundred", + dividend=1, + divisor=1e-100, + expected=1e100, + msg="Should handle tiny divisor producing large result", + ), + # Rounding edge cases + DivideTest( + "double_half_dividend", + dividend=DOUBLE_HALF, + divisor=2, + expected=0.25, + msg="Should correctly halve 0.5", + ), + DivideTest( + "double_one_and_half_dividend", + dividend=DOUBLE_ONE_AND_HALF, + divisor=3, + expected=0.5, + msg="Should correctly divide 1.5 by 3", + ), + DivideTest( + "double_just_below_half_dividend", + dividend=DOUBLE_JUST_BELOW_HALF, + divisor=2, + expected=pytest.approx(0.2499999999999997), + msg="Should preserve precision near 0.5 boundary", + ), + DivideTest( + "double_just_above_half_dividend", + dividend=DOUBLE_JUST_ABOVE_HALF, + divisor=2, + expected=pytest.approx(0.2500000005), + msg="Should preserve precision just above 0.5", + ), + DivideTest( + "decimal_half_dividend", + dividend=DECIMAL128_HALF, + divisor=Decimal128("2"), + expected=Decimal128("0.25"), + msg="Should correctly halve decimal 0.5", + ), + DivideTest( + "decimal_just_below_half_dividend", + dividend=DECIMAL128_JUST_BELOW_HALF, + divisor=Decimal128("2"), + expected=Decimal128("0.2500000000000000000000000000000000"), + msg="Should preserve decimal precision near 0.5", + ), + DivideTest( + "decimal_just_above_half_dividend", + dividend=DECIMAL128_JUST_ABOVE_HALF, + divisor=Decimal128("2"), + expected=Decimal128("0.2500000000000000000000000000000000"), + msg="Should preserve decimal precision just above 0.5", + ), + # Null and missing (MISSING constant) + DivideTest( + "null_divisor", + dividend=10, + divisor=None, + expected=None, + msg="Should return null when divisor is null", + ), + DivideTest( + "null_dividend", + dividend=None, + divisor=2, + expected=None, + msg="Should return null when dividend is null", + ), + DivideTest( + "missing_dividend", + dividend=MISSING, + divisor=2, + expected=None, + msg="Should return null when dividend is missing", + ), + DivideTest( + "missing_divisor", + dividend=10, + divisor=MISSING, + expected=None, + msg="Should return null when divisor is missing", + ), + DivideTest( + "both_null", + dividend=None, + divisor=None, + expected=None, + msg="Should return null when both are null", + ), + # Error cases - invalid types + DivideTest( + "string_divisor", + dividend=10, + divisor="string", + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject string divisor", + ), + DivideTest( + "string_dividend", + dividend="string", + divisor=2, + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject string dividend", + ), + DivideTest( + "boolean_divisor", + dividend=10, + divisor=True, + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject boolean divisor", + ), + DivideTest( + "boolean_dividend", + dividend=True, + divisor=2, + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject boolean dividend", + ), + DivideTest( + "array_divisor", + dividend=10, + divisor=[2, 3], + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject array divisor", + ), + DivideTest( + "array_dividend", + dividend=[2, 3], + divisor=2, + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject array dividend", + ), + DivideTest( + "object_divisor", + dividend=10, + divisor={"a": 2}, + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject object divisor", + ), + DivideTest( + "object_dividend", + dividend={"a": 2}, + divisor=2, + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject object dividend", + ), + DivideTest( + "empty_array_dividend", + dividend=[], + divisor=2, + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject empty array dividend", + ), + DivideTest( + "empty_object_dividend", + dividend={}, + divisor=2, + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject empty object dividend", + ), + DivideTest( + "empty_array_divisor", + dividend=10, + divisor=[], + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject empty array divisor", + ), + DivideTest( + "empty_object_divisor", + dividend=10, + divisor={}, + error_code=TYPE_MISMATCH_ERROR, + msg="Should reject empty object divisor", + ), + # Error cases - zero divisor + DivideTest( + "zero_divisor", + dividend=10, + divisor=0, + error_code=DIVIDE_BY_ZERO_ERROR, + msg="Should reject division by zero int", + ), + DivideTest( + "zero_divisor_double", + dividend=10, + divisor=0.0, + error_code=DIVIDE_BY_ZERO_ERROR, + msg="Should reject division by zero double", + ), + DivideTest( + "decimal_zero_divisor", + dividend=Decimal128("10"), + divisor=Decimal128("0"), + error_code=DIVIDE_BY_ZERO_ERROR, + msg="Should reject division by zero decimal128", + ), + DivideTest( + "zero_div_zero", + dividend=0, + divisor=0, + error_code=DIVIDE_BY_ZERO_ERROR, + msg="Should reject 0/0", + ), + DivideTest( + "zero_double_div_zero", + dividend=0.0, + divisor=0.0, + error_code=DIVIDE_BY_ZERO_ERROR, + msg="Should reject 0.0/0.0", + ), + DivideTest( + "decimal_zero_div_zero", + dividend=Decimal128("0"), + divisor=Decimal128("0"), + error_code=DIVIDE_BY_ZERO_ERROR, + msg="Should reject decimal 0/0", + ), +] + + +@pytest.mark.parametrize("test", DIVIDE_TESTS, ids=lambda t: t.id) +def test_divide_literal(collection, test): + """Test $divide from literals""" + result = execute_expression(collection, {"$divide": [test.dividend, test.divisor]}) + assertResult(result, expected=test.expected, error_code=test.error_code, msg=test.msg) + + +@pytest.mark.parametrize( + "test", + [t for t in DIVIDE_TESTS if t.dividend != MISSING and t.divisor != MISSING], + ids=lambda t: t.id, +) +def test_divide_insert(collection, test): + """Test $divide from documents""" + result = execute_expression_with_insert( + collection, + {"$divide": ["$dividend", "$divisor"]}, + {"dividend": test.dividend, "divisor": test.divisor}, + ) + assertResult(result, expected=test.expected, error_code=test.error_code, msg=test.msg) + + +@pytest.mark.parametrize( + "test", [t for t in DIVIDE_TESTS if t.dividend != MISSING], ids=lambda t: t.id +) +def test_divide_mixed(collection, test): + """Test $divide mixed literal and document""" + result = execute_expression_with_insert( + collection, {"$divide": ["$dividend", test.divisor]}, {"dividend": test.dividend} + ) + assertResult(result, expected=test.expected, error_code=test.error_code, msg=test.msg) diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/arithmetic/divide/test_smoke_expression_divide.py b/documentdb_tests/compatibility/tests/core/operator/expressions/arithmetic/divide/test_smoke_expression_divide.py new file mode 100644 index 0000000..c026a82 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/arithmetic/divide/test_smoke_expression_divide.py @@ -0,0 +1,29 @@ +""" +Smoke test for $divide expression. + +Tests basic $divide expression functionality. +""" + +import pytest + +from documentdb_tests.framework.assertions import assertSuccess +from documentdb_tests.framework.executor import execute_command + +pytestmark = pytest.mark.smoke + + +def test_smoke_expression_divide(collection): + """Test basic $divide expression behavior.""" + collection.insert_many([{"_id": 1, "a": 20, "b": 4}, {"_id": 2, "a": 30, "b": 5}]) + + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [{"$project": {"quotient": {"$divide": ["$a", "$b"]}}}], + "cursor": {}, + }, + ) + + expected = [{"_id": 1, "quotient": 5}, {"_id": 2, "quotient": 6}] + assertSuccess(result, expected, "Should support $divide expression") diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/expressions/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/documentdb_tests/compatibility/tests/core/operator/expressions/utils/utils.py b/documentdb_tests/compatibility/tests/core/operator/expressions/utils/utils.py new file mode 100644 index 0000000..5dcc1a3 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/expressions/utils/utils.py @@ -0,0 +1,157 @@ +""" +Utility functions for functional tests. + +Provides helper functions for building and executing MongoDB aggregation +expressions and operators in test scenarios. +""" + +from documentdb_tests.framework.executor import execute_command + + +def build_nested_expr(value, operator, depth): + """ + Build nested operator expression. + + Creates a nested structure where an operator is applied multiple times. + For example, with operator="abs" and depth=2, creates: {$abs: {$abs: value}} + + Args: + value: The innermost value to wrap + operator: The operator name (without $ prefix) + depth: Number of times to nest the operator + + Returns: + dict: Nested operator expression + + Example: + >>> build_nested_expr(5, "abs", 2) + {'$abs': {'$abs': 5}} + """ + expr = value + for _ in range(depth): + expr = {f"${operator}": expr} + return expr + + +def execute_project(collection, project): + """ + Execute a projection with literal input values. + + Args: + collection: MongoDB collection object + project: Fields to project. Do not include _id; the function always + excludes it to keep test assertions free of auto-generated values. + + Returns: + Result from execute_command + + Example: + >>> execute_project(collection, {"sum": {"$add": [1, 2]}}) + # Returns result with {"sum": 3} in firstBatch + """ + return execute_command( + collection, + { + "aggregate": 1, + "pipeline": [ + {"$documents": [{}]}, + {"$project": {**project, "_id": 0}}, + ], + "cursor": {}, + }, + ) + + +def execute_project_with_insert(collection, document, project): + """ + Execute a projection with values from an inserted document. + + Args: + collection: MongoDB collection object + document: Document to insert + project: Fields to project. Do not include _id; the function always + excludes it to keep test assertions free of auto-generated values. + + Returns: + Result from execute_command + + Example: + >>> execute_project_with_insert( + ... collection, + ... {"a": 10, "b": 3}, + ... {"quotient": {"$divide": ["$a", "$b"]}} + ... ) + # Returns result with {"quotient": 3.33...} in firstBatch + """ + collection.insert_one(document) + return execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {**project, "_id": 0}}, + ], + "cursor": {}, + }, + ) + + +def execute_expression(collection, expression): + """ + Execute an aggregation expression using $documents stage. + + Evaluates an expression against an empty document using the $documents + stage. Useful for testing expressions with literal values. + + Args: + collection: MongoDB collection object + expression: The expression to evaluate (e.g., {"$add": [1, 2]}) + + Returns: + Result from execute_command with structure: + {"cursor": {"firstBatch": [{"result": }]}} + + Example: + >>> execute_expression(collection, {"$add": [1, 2]}) + # Returns result with {"result": 3} in firstBatch + """ + return execute_command( + collection, + { + "aggregate": 1, + "pipeline": [ + {"$documents": [{}]}, + {"$project": {"_id": 0, "result": expression}}, + ], + "cursor": {}, + }, + ) + + +def execute_expression_with_insert(collection, expression, document): + """ + Execute an aggregation expression with values from an inserted document. + + Inserts a document into the collection, then evaluates the expression + via $project. Useful for testing expressions with field references. + + Args: + collection: MongoDB collection object + expression: The expression to evaluate (e.g., {"$divide": ["$a", "$b"]}) + document: Document to insert (e.g., {"a": 10, "b": 2}) + + Returns: + Result from execute_command with structure: + {"cursor": {"firstBatch": [{"result": }]}} + """ + collection.insert_one(document) + return execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": [ + {"$project": {"_id": 0, "result": expression}}, + ], + "cursor": {}, + }, + ) diff --git a/documentdb_tests/conftest.py b/documentdb_tests/conftest.py index e31f0d3..d350204 100644 --- a/documentdb_tests/conftest.py +++ b/documentdb_tests/conftest.py @@ -12,9 +12,18 @@ # Enable assertion rewriting BEFORE importing framework modules pytest.register_assert_rewrite("documentdb_tests.framework.assertions") -from documentdb_tests.framework import fixtures -from documentdb_tests.framework.test_structure_validator import validate_python_files_in_tests -from pathlib import Path +from pathlib import Path # noqa: E402 + +from documentdb_tests.framework import fixtures # noqa: E402 +from documentdb_tests.framework.error_codes_validator import ( # noqa: E402 + validate_error_codes_sorted, +) +from documentdb_tests.framework.test_format_validator import ( # noqa: E402 + validate_test_format, +) +from documentdb_tests.framework.test_structure_validator import ( # noqa: E402 + validate_python_files_in_tests, +) def pytest_addoption(parser): @@ -30,8 +39,7 @@ def pytest_addoption(parser): "--engine-name", action="store", default="default", - help="Optional engine identifier for metadata. " - "Example: --engine-name documentdb", + help="Optional engine identifier for metadata. " "Example: --engine-name documentdb", ) @@ -54,11 +62,11 @@ def pytest_configure(config): def engine_client(request): """ Create a MongoDB client for the configured engine. - + Session-scoped for performance - MongoClient is thread-safe and maintains an internal connection pool. This significantly improves test execution speed by eliminating redundant connection overhead. - + Per-test isolation is maintained through database_client and collection fixtures which create unique databases/collections for each test. @@ -67,7 +75,7 @@ def engine_client(request): Yields: MongoClient: Connected MongoDB client (shared across session) - + Raises: ConnectionError: If unable to connect to the database """ @@ -130,7 +138,7 @@ def collection(database_client, request, worker_id): # Generate unique collection name using framework utility full_test_id = request.node.nodeid collection_name = fixtures.generate_collection_name(full_test_id, worker_id) - + coll = database_client[collection_name] yield coll @@ -141,22 +149,46 @@ def collection(database_client, request, worker_id): def pytest_collection_modifyitems(session, config, items): """ - Combined pytest hook to validate test structure. + Combined pytest hook to validate test structure, format, and framework invariants. """ - errors = [] + structure_errors = [] + format_errors = {} + # Validate file structure for all files under "tests" folder if items: first_item_path = Path(items[0].fspath) if "tests" in first_item_path.parts: tests_idx = first_item_path.parts.index("tests") - tests_dir = Path(*first_item_path.parts[:tests_idx + 1]) - errors.extend(validate_python_files_in_tests(tests_dir)) - - if errors: + tests_dir = Path(*first_item_path.parts[: tests_idx + 1]) + structure_errors.extend(validate_python_files_in_tests(tests_dir)) + + # Validate test format for collected test files + seen_files = set() + for item in items: + file_path = str(item.fspath) + if file_path in seen_files: + continue + seen_files.add(file_path) + file_errors = validate_test_format(file_path) + if file_errors: + format_errors[file_path] = file_errors + + # Validate framework error code invariants + structure_errors.extend(validate_error_codes_sorted()) + + if structure_errors or format_errors: import sys - print("\n\n❌ Folder Structure Violations:", file=sys.stderr) - print("".join(errors), file=sys.stderr) - print("\nSee docs/testing/FOLDER_STRUCTURE.md for rules.\n", file=sys.stderr) + if structure_errors: + print("\n\n❌ Folder Structure Violations:", file=sys.stderr) + print("".join(structure_errors), file=sys.stderr) + print("\nSee docs/testing/FOLDER_STRUCTURE.md for rules.\n", file=sys.stderr) + + if format_errors: + print("\n❌ Test Format Violations:", file=sys.stderr) + for file_path, file_errors in format_errors.items(): + print(f"\n{file_path}:", file=sys.stderr) + print("\n".join(file_errors), file=sys.stderr) + print("\nSee docs/testing/TEST_FORMAT.md for rules.\n", file=sys.stderr) pytest.exit("Test validation failed", returncode=1) diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py new file mode 100644 index 0000000..c285b5a --- /dev/null +++ b/documentdb_tests/framework/error_codes.py @@ -0,0 +1,23 @@ +""" +Error Codes expected from the server. +Keep sorted by error code number. No duplicates. +""" + +DIVIDE_BY_ZERO_ERROR = 2 +TYPE_MISMATCH_ERROR = 14 +EXPRESSION_TYPE_MISMATCH_ERROR = 16020 +MODULO_ZERO_REMAINDER_ERROR = 16610 +MODULO_NON_NUMERIC_ERROR = 16611 +MORE_THAN_ONE_DATE_ERROR = 16612 +ABS_OVERFLOW_ERROR = 28680 +LOG_NON_NUMERIC_VALUE_ERROR = 28756 +LOG_NON_NUMERIC_BASE_ERROR = 28757 +LOG_NON_POSITIVE_VALUE_ERROR = 28758 +LOG_INVALID_BASE_ERROR = 28759 +LOG10_NON_POSITIVE_INPUT_ERROR = 28761 +POW_NON_NUMERIC_BASE_ERROR = 28762 +POW_NON_NUMERIC_EXP_ERROR = 28763 +POW_BASE_ZERO_EXP_NEGATIVE_ERROR = 28764 +NON_NUMERIC_TYPE_MISMATCH_ERROR = 28765 +LN_NON_POSITIVE_INPUT_ERROR = 28766 +MODULO_DECIMAL128_ZERO_REMAINDER_ERROR = 5733415 diff --git a/documentdb_tests/framework/error_codes_validator.py b/documentdb_tests/framework/error_codes_validator.py new file mode 100644 index 0000000..a5417dc --- /dev/null +++ b/documentdb_tests/framework/error_codes_validator.py @@ -0,0 +1,59 @@ +""" +Validator to ensure error_codes.py assignments are sorted by value and have no duplicate values. +""" + +import ast +from pathlib import Path + + +def validate_error_codes_sorted() -> list[str]: + """ + Validate that error code assignments in error_codes.py are sorted by value + and have no duplicate values. + + Returns: + List of error messages for violations. Empty if valid. + """ + file_path = Path(__file__).parent / "error_codes.py" + if not file_path.exists(): + return ["Error code assignments could not be found."] + + tree = ast.parse(file_path.read_text(), filename=str(file_path)) + + codes = [] + for node in ast.iter_child_nodes(tree): + if isinstance(node, ast.Assign) and len(node.targets) == 1: + target = node.targets[0] + if isinstance(target, ast.Name) and isinstance(node.value, ast.Constant): + val = node.value.value + if not isinstance(val, int): + continue + codes.append((target.id, val, node.lineno)) + + errors = [] + for i in range(1, len(codes)): + prev_name, prev_val, _ = codes[i - 1] + curr_name, curr_val, curr_line = codes[i] + if curr_val < prev_val: + errors.append( + f" Line {curr_line}: {curr_name} = {curr_val} is out of order " + f"(follows {prev_name} = {prev_val}). " + f"Keep error_codes.py sorted by value." + ) + + # Check for duplicate values. Each numeric code should have exactly one constant. + seen: dict[int, tuple[str, int]] = {} + for name, val, lineno in codes: + if val in seen: + first_name, first_line = seen[val] + errors.append( + f" Line {lineno}: {name} = {val} duplicates {first_name} " + f"(line {first_line}). " + f"Reuse the existing constant or rename it to cover " + f"the shared scope (e.g., operator family), but do not " + f"over-generalize without confirming the code is widely shared." + ) + else: + seen[val] = (name, lineno) + + return errors diff --git a/documentdb_tests/framework/test_case.py b/documentdb_tests/framework/test_case.py new file mode 100644 index 0000000..b0a26cd --- /dev/null +++ b/documentdb_tests/framework/test_case.py @@ -0,0 +1,27 @@ +from dataclasses import dataclass +from typing import Any, Optional + + +@dataclass(frozen=True) +class BaseTestCase: + """Base dataclass for all parametrized test cases. + + Sub-classes must include `@dataclass(frozen=True)`. + + Attributes: + id: Unique identifier for the test case + expected: Expected result value (None for error cases) + error_code: Expected error code (None for success cases) + msg: Description of expected behavior for assertion messages (required) + """ + + id: str + expected: Any = None + error_code: Optional[int] = None + msg: Optional[str] = None + + def __post_init__(self): + if self.msg is None: + raise ValueError( + f"BaseTestCase '{self.id}' must have a msg describing expected behavior" + ) diff --git a/documentdb_tests/framework/test_constants.py b/documentdb_tests/framework/test_constants.py new file mode 100644 index 0000000..ce8bb69 --- /dev/null +++ b/documentdb_tests/framework/test_constants.py @@ -0,0 +1,143 @@ +from bson import Decimal128, Int64 + +# Int32 boundary values +INT32_UNDERFLOW = -2147483649 +INT32_MIN = -2147483648 +INT32_MIN_PLUS_1 = -2147483647 +INT32_ZERO = 0 +INT32_MAX = 2147483647 +INT32_MAX_MINUS_1 = 2147483646 +INT32_OVERFLOW = 2147483648 + +# Int64 boundary values +INT64_MIN = Int64(-9223372036854775808) +INT64_MIN_PLUS_1 = Int64(-9223372036854775807) +INT64_ZERO = Int64(0) +INT64_MAX = Int64(9223372036854775807) +INT64_MAX_MINUS_1 = Int64(9223372036854775806) + +# Double boundary values +DOUBLE_MIN_NEGATIVE_SUBNORMAL = -5e-324 +DOUBLE_NEGATIVE_ZERO = -0.0 +DOUBLE_ZERO = 0.0 +DOUBLE_MIN_SUBNORMAL = 5e-324 +DOUBLE_NEAR_MAX = 1e308 +DOUBLE_NEAR_MIN = 1e-308 +DOUBLE_MAX_SAFE_INTEGER = 9007199254740992 +DOUBLE_PRECISION_LOSS = 9007199254740993 + +# Double rounding tests +DOUBLE_HALF = 0.5 +DOUBLE_ONE_AND_HALF = 1.5 +DOUBLE_TWO_AND_HALF = 2.5 +DOUBLE_NEGATIVE_HALF = -0.5 +DOUBLE_NEGATIVE_ONE_AND_HALF = -1.5 +DOUBLE_JUST_BELOW_HALF = 0.4999999999999994 +DOUBLE_JUST_ABOVE_HALF = 0.500000001 + +# Float boundary values +FLOAT_NEGATIVE_INFINITY = float("-inf") +FLOAT_INFINITY = float("inf") +FLOAT_NAN = float("nan") + +# Decimal128 boundary values +DECIMAL128_NEGATIVE_INFINITY = Decimal128("-Infinity") +DECIMAL128_MIN = Decimal128("-9.999999999999999999999999999999999E+6144") +DECIMAL128_ZERO = Decimal128("0") +DECIMAL128_NEGATIVE_ZERO = Decimal128("-0") +DECIMAL128_INFINITY = Decimal128("Infinity") +DECIMAL128_MAX = Decimal128("9.999999999999999999999999999999999E+6144") +DECIMAL128_LARGE_EXPONENT = Decimal128("1E+6144") +DECIMAL128_SMALL_EXPONENT = Decimal128("1E-6143") +DECIMAL128_TRAILING_ZERO = Decimal128("1.0") +DECIMAL128_MANY_TRAILING_ZEROS = Decimal128("1.00000000000000000000000000000000") +DECIMAL128_NAN = Decimal128("nan") + +# Decimal rounding tests +DECIMAL128_HALF = Decimal128("0.5") +DECIMAL128_ONE_AND_HALF = Decimal128("1.5") +DECIMAL128_TWO_AND_HALF = Decimal128("2.5") +DECIMAL128_NEGATIVE_HALF = Decimal128("-0.5") +DECIMAL128_NEGATIVE_ONE_AND_HALF = Decimal128("-1.5") +DECIMAL128_JUST_BELOW_HALF = Decimal128("0.4999999999999999999999999999999999") +DECIMAL128_JUST_ABOVE_HALF = Decimal128("0.5000000000000000000000000000000001") + +# Other constant values +MISSING = "$missing" + +# Int32 lists +NUMERIC_INT32_NEGATIVE = [INT32_UNDERFLOW, INT32_MIN] +NUMERIC_INT32_ZERO = [INT32_ZERO] +NUMERIC_INT32_POSITIVE = [INT32_OVERFLOW, INT32_MAX] +NUMERIC_INT32 = NUMERIC_INT32_NEGATIVE + NUMERIC_INT32_ZERO + NUMERIC_INT32_POSITIVE + +# Int64 lists +NUMERIC_INT64_NEGATIVE = [INT64_MIN] +NUMERIC_INT64_ZERO = [INT64_ZERO] +NUMERIC_INT64_POSITIVE = [INT64_MAX] +NUMERIC_INT64 = NUMERIC_INT64_NEGATIVE + NUMERIC_INT64_ZERO + NUMERIC_INT64_POSITIVE + +# Double lists +NUMERIC_DOUBLE_NEGATIVE = [DOUBLE_MIN_NEGATIVE_SUBNORMAL] +NUMERIC_DOUBLE_ZERO = [DOUBLE_NEGATIVE_ZERO, DOUBLE_ZERO] +NUMERIC_DOUBLE_POSITIVE = [ + DOUBLE_MIN_SUBNORMAL, + DOUBLE_NEAR_MAX, + DOUBLE_NEAR_MIN, + DOUBLE_MAX_SAFE_INTEGER, + DOUBLE_PRECISION_LOSS, +] +NUMERIC_DOUBLE = NUMERIC_DOUBLE_NEGATIVE + NUMERIC_DOUBLE_ZERO + NUMERIC_DOUBLE_POSITIVE + +# Float lists +NUMERIC_FLOAT_NEGATIVE = [FLOAT_NEGATIVE_INFINITY] +NUMERIC_FLOAT_ZERO = [] +NUMERIC_FLOAT_POSITIVE = [FLOAT_INFINITY] +NUMERIC_FLOAT = NUMERIC_FLOAT_NEGATIVE + NUMERIC_FLOAT_ZERO + NUMERIC_FLOAT_POSITIVE + +# Decimal128 lists +NUMERIC_DECIMAL128_NEGATIVE = [DECIMAL128_NEGATIVE_INFINITY, DECIMAL128_MIN] +NUMERIC_DECIMAL128_ZERO = [DECIMAL128_ZERO, DECIMAL128_NEGATIVE_ZERO] +NUMERIC_DECIMAL128_POSITIVE = [ + DECIMAL128_INFINITY, + DECIMAL128_MAX, + DECIMAL128_LARGE_EXPONENT, + DECIMAL128_SMALL_EXPONENT, + DECIMAL128_TRAILING_ZERO, + DECIMAL128_JUST_BELOW_HALF, + DECIMAL128_JUST_ABOVE_HALF, + DECIMAL128_MANY_TRAILING_ZEROS, +] +NUMERIC_DECIMAL128 = ( + NUMERIC_DECIMAL128_NEGATIVE + NUMERIC_DECIMAL128_ZERO + NUMERIC_DECIMAL128_POSITIVE +) + +# Combined lists +NEGATIVE_NUMERIC = ( + NUMERIC_DECIMAL128_NEGATIVE + + NUMERIC_FLOAT_NEGATIVE + + NUMERIC_INT32_NEGATIVE + + NUMERIC_INT64_NEGATIVE + + NUMERIC_DOUBLE_NEGATIVE +) + +ZERO_NUMERIC = ( + NUMERIC_DECIMAL128_ZERO + + NUMERIC_FLOAT_ZERO + + NUMERIC_INT32_ZERO + + NUMERIC_INT64_ZERO + + NUMERIC_DOUBLE_ZERO +) + +POSITIVE_NUMERIC = ( + NUMERIC_DECIMAL128_POSITIVE + + NUMERIC_FLOAT_POSITIVE + + NUMERIC_INT32_POSITIVE + + NUMERIC_INT64_POSITIVE + + NUMERIC_DOUBLE_POSITIVE +) + +NUMERIC = NUMERIC_DECIMAL128 + NUMERIC_FLOAT + NUMERIC_INT32 + NUMERIC_INT64 + NUMERIC_DOUBLE + +# NaN values +NOT_A_NUMBER = [FLOAT_NAN, DECIMAL128_NAN] diff --git a/documentdb_tests/framework/test_format_validator.py b/documentdb_tests/framework/test_format_validator.py new file mode 100644 index 0000000..d059172 --- /dev/null +++ b/documentdb_tests/framework/test_format_validator.py @@ -0,0 +1,131 @@ +""" +Test format validator to enforce test writing conventions. +""" + +from __future__ import annotations + +import ast + + +def validate_test_format(file_path: str) -> list[str]: + """ + Validate that test functions follow format conventions. + + Returns: + List of error messages for violations + """ + errors = [] + + try: + with open(file_path, "r") as f: + tree = ast.parse(f.read(), filename=file_path) + except Exception: + return errors # Skip files that can't be parsed + + # First pass: collect helper functions that call execute_command + helper_functions_with_execute = set() + + # Add known helper functions from documentdb_tests.framework.utils that call execute_command + helper_functions_with_execute.update( + [ + "execute_project", + "execute_project_with_insert", + "execute_expression", + "execute_expression_with_insert", + ] + ) + + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef) and not node.name.startswith("test_"): + # Check if this helper function calls execute_command + has_execute = any( + isinstance(n, ast.Call) + and isinstance(n.func, ast.Name) + and n.func.id in ("execute_command", "execute_admin_command") + for n in ast.walk(node) + ) + if has_execute: + helper_functions_with_execute.add(node.name) + + # Second pass: validate test functions + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"): + # Check for docstring + if not ast.get_docstring(node): + errors.append(f" Function '{node.name}' at line {node.lineno}: Missing docstring") + + # Check for plain assert statements + for n in ast.walk(node): + if isinstance(n, ast.Assert): + errors.append( + f" Function '{node.name}' at line {n.lineno}: " + f"Don't use plain 'assert'. Use assertSuccess(), assertFailure()," + f" or assertResult() instead." + ) + + # Check for multiple assertion helper calls + call_count = sum( + 1 + for n in ast.walk(node) + if isinstance(n, ast.Call) + and isinstance(n.func, ast.Name) + and n.func.id + in ( + "assertSuccess", + "assertFailure", + "assertResult", + "assertFailureCode", + "assertNaN", + ) + ) + + if call_count > 1: + errors.append( + f" Function '{node.name}' at line {node.lineno}: " + f"Multiple assertions ({call_count}). Use one assertion per test." + ) + + # Check for execute_command or execute_admin_command usage + has_execute_command = any( + isinstance(n, ast.Call) + and isinstance(n.func, ast.Name) + and n.func.id in ("execute_command", "execute_admin_command") + for n in ast.walk(node) + ) + + # Also check if test calls helper functions that use execute_command + has_helper_with_execute = any( + isinstance(n, ast.Call) + and isinstance(n.func, ast.Name) + and n.func.id in helper_functions_with_execute + for n in ast.walk(node) + ) + + # Check if test has a parameter that could be an executor function + # Look for parametrize decorators that pass executor functions + has_executor_param = False + for decorator in node.decorator_list: + if ( + isinstance(decorator, ast.Call) + and isinstance(decorator.func, ast.Attribute) + and decorator.func.attr == "parametrize" + ): + # Check if any argument is a list containing helper functions + for arg in decorator.args: + if isinstance(arg, ast.List): + for elt in arg.elts: + if ( + isinstance(elt, ast.Name) + and elt.id in helper_functions_with_execute + ): + has_executor_param = True + break + + if not has_execute_command and not has_helper_with_execute and not has_executor_param: + errors.append( + f" Function '{node.name}' at line {node.lineno}: " + f"Must use execute_command(), execute_admin_command(), or helper" + f" functions from documentdb_tests.framework.utils for MongoDB operations" + ) + + return errors diff --git a/documentdb_tests/framework/test_structure_validator.py b/documentdb_tests/framework/test_structure_validator.py index 2d21366..57f811f 100644 --- a/documentdb_tests/framework/test_structure_validator.py +++ b/documentdb_tests/framework/test_structure_validator.py @@ -1,6 +1,7 @@ """ Test structure validator to enforce folder organization rules. """ + from pathlib import Path @@ -29,11 +30,18 @@ def validate_python_files_in_tests(tests_dir: Path) -> list[str]: ) continue - if not py_file.stem.startswith(f"test_{parent_folder}"): + if not py_file.stem.startswith("test_"): + rel_path = py_file.relative_to(tests_dir.parent) + errors.append( + f"\n {rel_path}\n → Test file in /{parent_folder}/ should start with 'test_'" + f" in filename to be picked up by pytest. Non-test utilities should be moved" + f" to a utils/ or fixtures/ folder." + ) + + if f"{parent_folder}" not in py_file.stem: rel_path = py_file.relative_to(tests_dir.parent) errors.append( - f"\n {rel_path}\n → Expected pattern: test_{parent_folder}_*.py, got: {py_file.name}. " - f"Test file in /{parent_folder}/ should include feature name in filename. Non-test utilities should be moved to a utils/ or fixtures/ folder." + f"\n {rel_path}\n → Test file name should contain the parent folder name." ) return errors diff --git a/documentdb_tests/pytest.ini b/documentdb_tests/pytest.ini index 24af4e2..4c76045 100644 --- a/documentdb_tests/pytest.ini +++ b/documentdb_tests/pytest.ini @@ -7,6 +7,7 @@ python_functions = test_* # Output options addopts = + -vv --strict-markers --tb=short --color=yes @@ -36,6 +37,7 @@ markers = # Special markers smoke: Quick smoke tests for feature detection slow: Tests that take longer to execute + replica: Tests that can only run on a replica # Timeout for tests (seconds) timeout = 300