|
485 | 485 | "exclusiveMinimum": 0, |
486 | 486 | "type": "integer", |
487 | 487 | }, |
| 488 | + "use_first_found_file_for_schema_discovery": { |
| 489 | + "default": False, |
| 490 | + "description": "When enabled, the source will use the first found file for schema discovery. Helps to avoid long discovery step.", |
| 491 | + "title": "Use First Found File For Schema Discover", |
| 492 | + "type": "boolean", |
| 493 | + }, |
488 | 494 | }, |
489 | 495 | "required": ["name", "format"], |
490 | 496 | }, |
|
2114 | 2120 | } |
2115 | 2121 | ) |
2116 | 2122 | .set_expected_check_status("FAILED") |
2117 | | - .set_expected_check_error(None, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value) |
| 2123 | + .set_expected_check_error( |
| 2124 | + None, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
| 2125 | + ) |
2118 | 2126 | .set_expected_discover_error( |
2119 | | - ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value |
| 2127 | + ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
2120 | 2128 | ) |
2121 | 2129 | .set_expected_read_error( |
2122 | | - ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value |
| 2130 | + ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
2123 | 2131 | ) |
2124 | 2132 | ).build() |
2125 | 2133 |
|
|
2217 | 2225 | } |
2218 | 2226 | ) |
2219 | 2227 | .set_expected_check_status("FAILED") |
2220 | | - .set_expected_check_error(None, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value) |
| 2228 | + .set_expected_check_error( |
| 2229 | + None, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
| 2230 | + ) |
| 2231 | + .set_expected_discover_error( |
| 2232 | + ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
| 2233 | + ) |
| 2234 | + .set_expected_read_error( |
| 2235 | + ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
| 2236 | + ) |
| 2237 | +).build() |
| 2238 | + |
| 2239 | +recent_n_files_to_read_for_schema_discovery_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario: TestScenario[ |
| 2240 | + InMemoryFilesSource |
| 2241 | +] = ( |
| 2242 | + TestScenarioBuilder[InMemoryFilesSource]() |
| 2243 | + .set_name( |
| 2244 | + "recent_n_files_to_read_for_schema_discovery_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario" |
| 2245 | + ) |
| 2246 | + .set_config( |
| 2247 | + { |
| 2248 | + "streams": [ |
| 2249 | + { |
| 2250 | + "name": "stream1", |
| 2251 | + "format": {"filetype": "csv"}, |
| 2252 | + "globs": ["a.csv"], |
| 2253 | + "validation_policy": "Skip Record", |
| 2254 | + "recent_n_files_to_read_for_schema_discovery": 5, |
| 2255 | + "use_first_found_file_for_schema_discovery": True, |
| 2256 | + }, |
| 2257 | + { |
| 2258 | + "name": "stream2", |
| 2259 | + "format": {"filetype": "csv"}, |
| 2260 | + "globs": ["b.csv"], |
| 2261 | + "validation_policy": "Skip Record", |
| 2262 | + }, |
| 2263 | + ] |
| 2264 | + } |
| 2265 | + ) |
| 2266 | + .set_source_builder( |
| 2267 | + FileBasedSourceBuilder() |
| 2268 | + .set_files( |
| 2269 | + { |
| 2270 | + "a.csv": { |
| 2271 | + "contents": [ |
| 2272 | + ("col1", "col2"), |
| 2273 | + ("val11a", "val12a"), |
| 2274 | + ("val21a", "val22a"), |
| 2275 | + ], |
| 2276 | + "last_modified": "2023-06-05T03:54:07.000Z", |
| 2277 | + }, |
| 2278 | + "b.csv": { |
| 2279 | + "contents": [ |
| 2280 | + ("col3",), |
| 2281 | + ("val13b",), |
| 2282 | + ("val23b",), |
| 2283 | + ], |
| 2284 | + "last_modified": "2023-06-05T03:54:07.000Z", |
| 2285 | + }, |
| 2286 | + } |
| 2287 | + ) |
| 2288 | + .set_file_type("csv") |
| 2289 | + ) |
| 2290 | + .set_catalog( |
| 2291 | + CatalogBuilder() |
| 2292 | + .with_stream("stream1", SyncMode.full_refresh) |
| 2293 | + .with_stream("stream2", SyncMode.full_refresh) |
| 2294 | + .build() |
| 2295 | + ) |
| 2296 | + .set_expected_catalog( |
| 2297 | + { |
| 2298 | + "streams": [ |
| 2299 | + { |
| 2300 | + "json_schema": { |
| 2301 | + "type": "object", |
| 2302 | + "properties": { |
| 2303 | + "data": {"type": "object"}, |
| 2304 | + "_ab_source_file_last_modified": {"type": "string"}, |
| 2305 | + "_ab_source_file_url": {"type": "string"}, |
| 2306 | + }, |
| 2307 | + }, |
| 2308 | + "name": "stream1", |
| 2309 | + "supported_sync_modes": ["full_refresh", "incremental"], |
| 2310 | + "is_resumable": True, |
| 2311 | + "is_file_based": False, |
| 2312 | + "source_defined_cursor": True, |
| 2313 | + "default_cursor_field": ["_ab_source_file_last_modified"], |
| 2314 | + }, |
| 2315 | + { |
| 2316 | + "json_schema": { |
| 2317 | + "type": "object", |
| 2318 | + "properties": { |
| 2319 | + "col3": {"type": ["null", "string"]}, |
| 2320 | + "_ab_source_file_last_modified": {"type": "string"}, |
| 2321 | + "_ab_source_file_url": {"type": "string"}, |
| 2322 | + }, |
| 2323 | + }, |
| 2324 | + "name": "stream2", |
| 2325 | + "source_defined_cursor": True, |
| 2326 | + "default_cursor_field": ["_ab_source_file_last_modified"], |
| 2327 | + "supported_sync_modes": ["full_refresh", "incremental"], |
| 2328 | + "is_resumable": True, |
| 2329 | + "is_file_based": False, |
| 2330 | + }, |
| 2331 | + ] |
| 2332 | + } |
| 2333 | + ) |
| 2334 | + .set_expected_check_status("FAILED") |
| 2335 | + .set_expected_check_error( |
| 2336 | + None, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
| 2337 | + ) |
| 2338 | + .set_expected_discover_error( |
| 2339 | + ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
| 2340 | + ) |
| 2341 | + .set_expected_read_error( |
| 2342 | + ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
| 2343 | + ) |
| 2344 | +).build() |
| 2345 | + |
| 2346 | + |
| 2347 | +schemaless_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario: TestScenario[ |
| 2348 | + InMemoryFilesSource |
| 2349 | +] = ( |
| 2350 | + TestScenarioBuilder[InMemoryFilesSource]() |
| 2351 | + .set_name( |
| 2352 | + "schemaless_with_use_first_found_file_for_schema_discovery_fails_connection_check_multi_stream_scenario" |
| 2353 | + ) |
| 2354 | + .set_config( |
| 2355 | + { |
| 2356 | + "streams": [ |
| 2357 | + { |
| 2358 | + "name": "stream1", |
| 2359 | + "format": {"filetype": "csv"}, |
| 2360 | + "globs": ["a.csv"], |
| 2361 | + "validation_policy": "Skip Record", |
| 2362 | + "schemaless": True, |
| 2363 | + "use_first_found_file_for_schema_discovery": True, |
| 2364 | + }, |
| 2365 | + { |
| 2366 | + "name": "stream2", |
| 2367 | + "format": {"filetype": "csv"}, |
| 2368 | + "globs": ["b.csv"], |
| 2369 | + "validation_policy": "Skip Record", |
| 2370 | + }, |
| 2371 | + ] |
| 2372 | + } |
| 2373 | + ) |
| 2374 | + .set_source_builder( |
| 2375 | + FileBasedSourceBuilder() |
| 2376 | + .set_files( |
| 2377 | + { |
| 2378 | + "a.csv": { |
| 2379 | + "contents": [ |
| 2380 | + ("col1", "col2"), |
| 2381 | + ("val11a", "val12a"), |
| 2382 | + ("val21a", "val22a"), |
| 2383 | + ], |
| 2384 | + "last_modified": "2023-06-05T03:54:07.000Z", |
| 2385 | + }, |
| 2386 | + "b.csv": { |
| 2387 | + "contents": [ |
| 2388 | + ("col3",), |
| 2389 | + ("val13b",), |
| 2390 | + ("val23b",), |
| 2391 | + ], |
| 2392 | + "last_modified": "2023-06-05T03:54:07.000Z", |
| 2393 | + }, |
| 2394 | + } |
| 2395 | + ) |
| 2396 | + .set_file_type("csv") |
| 2397 | + ) |
| 2398 | + .set_catalog( |
| 2399 | + CatalogBuilder() |
| 2400 | + .with_stream("stream1", SyncMode.full_refresh) |
| 2401 | + .with_stream("stream2", SyncMode.full_refresh) |
| 2402 | + .build() |
| 2403 | + ) |
| 2404 | + .set_expected_catalog( |
| 2405 | + { |
| 2406 | + "streams": [ |
| 2407 | + { |
| 2408 | + "json_schema": { |
| 2409 | + "type": "object", |
| 2410 | + "properties": { |
| 2411 | + "data": {"type": "object"}, |
| 2412 | + "_ab_source_file_last_modified": {"type": "string"}, |
| 2413 | + "_ab_source_file_url": {"type": "string"}, |
| 2414 | + }, |
| 2415 | + }, |
| 2416 | + "name": "stream1", |
| 2417 | + "supported_sync_modes": ["full_refresh", "incremental"], |
| 2418 | + "is_resumable": True, |
| 2419 | + "is_file_based": False, |
| 2420 | + "source_defined_cursor": True, |
| 2421 | + "default_cursor_field": ["_ab_source_file_last_modified"], |
| 2422 | + }, |
| 2423 | + { |
| 2424 | + "json_schema": { |
| 2425 | + "type": "object", |
| 2426 | + "properties": { |
| 2427 | + "col3": {"type": ["null", "string"]}, |
| 2428 | + "_ab_source_file_last_modified": {"type": "string"}, |
| 2429 | + "_ab_source_file_url": {"type": "string"}, |
| 2430 | + }, |
| 2431 | + }, |
| 2432 | + "name": "stream2", |
| 2433 | + "source_defined_cursor": True, |
| 2434 | + "default_cursor_field": ["_ab_source_file_last_modified"], |
| 2435 | + "supported_sync_modes": ["full_refresh", "incremental"], |
| 2436 | + "is_resumable": True, |
| 2437 | + "is_file_based": False, |
| 2438 | + }, |
| 2439 | + ] |
| 2440 | + } |
| 2441 | + ) |
| 2442 | + .set_expected_check_status("FAILED") |
| 2443 | + .set_expected_check_error( |
| 2444 | + None, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
| 2445 | + ) |
2221 | 2446 | .set_expected_discover_error( |
2222 | | - ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value |
| 2447 | + ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
2223 | 2448 | ) |
2224 | 2449 | .set_expected_read_error( |
2225 | | - ConfigValidationError, FileBasedSourceError.CONFIG_VALIDATION_ERROR.value |
| 2450 | + ConfigValidationError, FileBasedSourceError.ERROR_VALIDATION_STREAM_DISCOVERY_OPTIONS.value |
2226 | 2451 | ) |
2227 | 2452 | ).build() |
2228 | 2453 |
|
|
0 commit comments