From 86eb888e19c4a42441584ad0f2a8ce847c99eeb3 Mon Sep 17 00:00:00 2001 From: Karolis Petkevicius Date: Fri, 27 Feb 2026 15:54:07 +0200 Subject: [PATCH 01/14] event listener action --- workflows/receive-event.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 workflows/receive-event.yml diff --git a/workflows/receive-event.yml b/workflows/receive-event.yml new file mode 100644 index 0000000..52f7999 --- /dev/null +++ b/workflows/receive-event.yml @@ -0,0 +1,25 @@ +name: GitBook Event Receiver + +on: + repository_dispatch: + types: [gitbook_update] # This must match the event_type from Repo A + +jobs: + process-gitbook-update: + runs-on: ubuntu-latest + steps: + - name: Acknowledge Event + run: | + echo "Event Received!" + echo "Source SHA: ${{ github.event.client_payload.sha }}" + echo "Author: ${{ github.event.client_payload.author }}" + echo "Message: ${{ github.event.client_payload.message }}" + + - name: Log the Changes + run: | + echo "--- START OF DIFF ---" + echo "${{ github.event.client_payload.diff }}" + echo "--- END OF DIFF ---" + + # Optional: You can add steps here to trigger a build, + # update a local file, or send a Slack notification. \ No newline at end of file From 035ea02fab98a09aca271b491705398bbfba8b6b Mon Sep 17 00:00:00 2001 From: Karolis Petkevicius Date: Fri, 27 Feb 2026 15:56:57 +0200 Subject: [PATCH 02/14] event listener --- .../receive-event.yml => .github/workflows/event-listener.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename workflows/receive-event.yml => .github/workflows/event-listener.yml (100%) diff --git a/workflows/receive-event.yml b/.github/workflows/event-listener.yml similarity index 100% rename from workflows/receive-event.yml rename to .github/workflows/event-listener.yml From dc7cb3831ae33b48cbd9dded2a92fff92d2f509e Mon Sep 17 00:00:00 2001 From: Karolis Petkevicius Date: Thu, 5 Mar 2026 13:46:04 +0200 Subject: [PATCH 03/14] event receiver get paths: --- .github/workflows/event-listener.yml | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/.github/workflows/event-listener.yml b/.github/workflows/event-listener.yml index 52f7999..1134be3 100644 --- a/.github/workflows/event-listener.yml +++ b/.github/workflows/event-listener.yml @@ -2,7 +2,7 @@ name: GitBook Event Receiver on: repository_dispatch: - types: [gitbook_update] # This must match the event_type from Repo A + types: [gitbook_update] jobs: process-gitbook-update: @@ -10,16 +10,25 @@ jobs: steps: - name: Acknowledge Event run: | - echo "Event Received!" - echo "Source SHA: ${{ github.event.client_payload.sha }}" + echo "🚀 Event Received from GitBook!" + echo "Commit: ${{ github.event.client_payload.sha }}" echo "Author: ${{ github.event.client_payload.author }}" - echo "Message: ${{ github.event.client_payload.message }}" + echo "Summary: ${{ github.event.client_payload.message }}" - - name: Log the Changes + - name: Show Changed Paths + run: | + echo "The following filtered paths were modified:" + echo "${{ github.event.client_payload.changed_paths }}" + + - name: View Detailed Diff run: | echo "--- START OF DIFF ---" echo "${{ github.event.client_payload.diff }}" echo "--- END OF DIFF ---" - # Optional: You can add steps here to trigger a build, - # update a local file, or send a Slack notification. \ No newline at end of file + # EXAMPLE: Action based on specific path + - name: Conditional Logic + if: contains(github.event.client_payload.changed_paths, 'docs/api-reference') + run: | + echo "Detected change in API Reference. Triggering internal sync..." + # Insert your custom command here (e.g., npm run build-docs) \ No newline at end of file From 8d26ae778130565890f7ff0186fe5434426e05c9 Mon Sep 17 00:00:00 2001 From: Karolis Petkevicius Date: Fri, 6 Mar 2026 13:22:44 +0200 Subject: [PATCH 04/14] Oxylabs Web Scraper SDK version 3.0.0 --- README.md | 107 +- src/oxylabs/_version.py | 2 +- src/oxylabs/internal/api.py | 9 +- src/oxylabs/internal/client.py | 120 ++- src/oxylabs/sources/amazon/amazon.py | 72 ++ src/oxylabs/sources/asian/__init__.py | 0 src/oxylabs/sources/asian/alibaba/__init__.py | 1 + src/oxylabs/sources/asian/alibaba/alibaba.py | 297 ++++++ .../sources/asian/aliexpress/__init__.py | 1 + .../sources/asian/aliexpress/aliexpress.py | 297 ++++++ src/oxylabs/sources/asian/avnet/__init__.py | 1 + src/oxylabs/sources/asian/avnet/avnet.py | 119 +++ .../sources/asian/flipkart/__init__.py | 1 + .../sources/asian/flipkart/flipkart.py | 297 ++++++ .../sources/asian/indiamart/__init__.py | 1 + .../sources/asian/indiamart/indiamart.py | 297 ++++++ src/oxylabs/sources/asian/lazada/__init__.py | 1 + src/oxylabs/sources/asian/lazada/lazada.py | 297 ++++++ src/oxylabs/sources/asian/rakuten/__init__.py | 1 + src/oxylabs/sources/asian/rakuten/rakuten.py | 204 ++++ src/oxylabs/sources/asian/shein/__init__.py | 1 + src/oxylabs/sources/asian/shein/shein.py | 127 +++ .../sources/asian/tokopedia/__init__.py | 1 + .../sources/asian/tokopedia/tokopedia.py | 204 ++++ src/oxylabs/sources/chatgpt/__init__.py | 1 + src/oxylabs/sources/chatgpt/chatgpt.py | 129 +++ src/oxylabs/sources/ebay/__init__.py | 1 + src/oxylabs/sources/ebay/ebay.py | 309 ++++++ src/oxylabs/sources/etsy/__init__.py | 1 + src/oxylabs/sources/etsy/etsy.py | 307 ++++++ src/oxylabs/sources/european/__init__.py | 0 .../sources/european/allegro/__init__.py | 1 + .../sources/european/allegro/allegro.py | 234 +++++ .../sources/european/cdiscount/__init__.py | 1 + .../sources/european/cdiscount/cdiscount.py | 297 ++++++ .../sources/european/idealo/__init__.py | 1 + src/oxylabs/sources/european/idealo/idealo.py | 113 +++ .../sources/european/mediamarkt/__init__.py | 1 + .../sources/european/mediamarkt/mediamarkt.py | 305 ++++++ src/oxylabs/sources/google/google.py | 256 +++++ .../google_shopping/google_shopping.py | 132 --- src/oxylabs/sources/kroger/kroger.py | 6 + .../sources/latin_american/__init__.py | 0 .../sources/latin_american/dcard/__init__.py | 1 + .../sources/latin_american/dcard/dcard.py | 113 +++ .../latin_american/falabella/__init__.py | 1 + .../latin_american/falabella/falabella.py | 297 ++++++ .../latin_american/magazineluiza/__init__.py | 1 + .../magazineluiza/magazineluiza.py | 297 ++++++ .../latin_american/mercadolibre/__init__.py | 1 + .../mercadolibre/mercadolibre.py | 297 ++++++ .../latin_american/mercadolivre/__init__.py | 1 + .../mercadolivre/mercadolivre.py | 204 ++++ .../sources/north_american/__init__.py | 0 .../bedbathandbeyond/__init__.py | 1 + .../bedbathandbeyond/bedbathandbeyond.py | 289 ++++++ .../north_american/bestbuy/__init__.py | 1 + .../sources/north_american/bestbuy/bestbuy.py | 254 +++++ .../north_american/bodegaaurrera/__init__.py | 1 + .../bodegaaurrera/bodegaaurrera.py | 295 ++++++ .../sources/north_american/costco/__init__.py | 1 + .../sources/north_american/costco/costco.py | 309 ++++++ .../north_american/grainger/__init__.py | 1 + .../north_american/grainger/grainger.py | 305 ++++++ .../north_american/instacart/__init__.py | 1 + .../north_american/instacart/instacart.py | 305 ++++++ .../sources/north_american/lowes/__init__.py | 1 + .../sources/north_american/lowes/lowes.py | 313 ++++++ .../north_american/menards/__init__.py | 1 + .../sources/north_american/menards/menards.py | 331 +++++++ .../sources/north_american/petco/__init__.py | 1 + .../sources/north_american/petco/petco.py | 224 +++++ .../sources/north_american/publix/__init__.py | 1 + .../sources/north_american/publix/publix.py | 307 ++++++ .../north_american/staples/__init__.py | 1 + .../sources/north_american/staples/staples.py | 127 +++ .../north_american/target_store/__init__.py | 1 + .../target_store/target_store.py | 452 +++++++++ .../north_american/walmart/__init__.py | 1 + .../sources/north_american/walmart/walmart.py | 379 ++++++++ src/oxylabs/sources/perplexity/__init__.py | 1 + src/oxylabs/sources/perplexity/perplexity.py | 113 +++ src/oxylabs/sources/real_estate/__init__.py | 0 .../sources/real_estate/airbnb/__init__.py | 1 + .../sources/real_estate/airbnb/airbnb.py | 198 ++++ .../sources/real_estate/zillow/__init__.py | 1 + .../sources/real_estate/zillow/zillow.py | 113 +++ src/oxylabs/sources/tiktok/__init__.py | 1 + src/oxylabs/sources/tiktok/tiktok.py | 289 ++++++ src/oxylabs/sources/universal/universal.py | 12 - src/oxylabs/sources/youtube/__init__.py | 1 + src/oxylabs/sources/youtube/youtube.py | 910 ++++++++++++++++++ src/oxylabs/utils/types/source.py | 138 ++- src/oxylabs/utils/utils.py | 4 +- tests/.DS_Store | Bin 0 -> 6148 bytes tests/sources/airbnb/__init__.py | 0 tests/sources/airbnb/test_airbnb.py | 27 + tests/sources/alibaba/__init__.py | 0 tests/sources/alibaba/test_alibaba.py | 48 + tests/sources/aliexpress/__init__.py | 0 tests/sources/aliexpress/test_aliexpress.py | 38 + tests/sources/allegro/__init__.py | 0 tests/sources/allegro/test_allegro.py | 37 + tests/sources/amazon/test_amazon.py | 149 +++ tests/sources/avnet/__init__.py | 0 tests/sources/avnet/test_avnet.py | 27 + tests/sources/bedbathandbeyond/__init__.py | 0 .../bedbathandbeyond/test_bedbathandbeyond.py | 41 + tests/sources/bestbuy/__init__.py | 0 tests/sources/bestbuy/test_bestbuy.py | 53 + tests/sources/bodegaaurrera/__init__.py | 0 .../bodegaaurrera/test_bodegaaurrera.py | 49 + tests/sources/cdiscount/__init__.py | 0 tests/sources/cdiscount/test_cdiscount.py | 38 + tests/sources/chatgpt/__init__.py | 0 tests/sources/chatgpt/test_chatgpt.py | 47 + tests/sources/costco/__init__.py | 0 tests/sources/costco/test_costco.py | 48 + tests/sources/dcard/__init__.py | 0 tests/sources/dcard/test_dcard.py | 27 + tests/sources/ebay/__init__.py | 0 tests/sources/ebay/test_ebay.py | 87 ++ tests/sources/etsy/__init__.py | 0 tests/sources/etsy/test_etsy.py | 67 ++ tests/sources/falabella/__init__.py | 0 tests/sources/falabella/test_falabella.py | 38 + tests/sources/flipkart/__init__.py | 0 tests/sources/flipkart/test_flipkart.py | 38 + tests/sources/google/__init__.py | 0 tests/sources/google/test_google.py | 106 ++ tests/sources/google_shopping/__init__.py | 0 .../google_shopping/test_google_shopping.py | 96 ++ tests/sources/grainger/__init__.py | 0 tests/sources/grainger/test_grainger.py | 48 + tests/sources/idealo/__init__.py | 0 tests/sources/idealo/test_idealo.py | 15 + tests/sources/indiamart/__init__.py | 0 tests/sources/indiamart/test_indiamart.py | 38 + tests/sources/instacart/__init__.py | 0 tests/sources/instacart/test_instacart.py | 38 + tests/sources/kroger/__init__.py | 0 tests/sources/kroger/test_kroger.py | 68 ++ tests/sources/lazada/__init__.py | 0 tests/sources/lazada/test_lazada.py | 38 + tests/sources/lowes/__init__.py | 0 tests/sources/lowes/test_lowes.py | 50 + tests/sources/magazineluiza/__init__.py | 0 .../magazineluiza/test_magazineluiza.py | 38 + tests/sources/mediamarkt/__init__.py | 0 tests/sources/mediamarkt/test_mediamarkt.py | 48 + tests/sources/menards/__init__.py | 0 tests/sources/menards/test_menards.py | 50 + tests/sources/mercadolibre/__init__.py | 0 .../sources/mercadolibre/test_mercadolibre.py | 38 + tests/sources/mercadolivre/__init__.py | 0 .../sources/mercadolivre/test_mercadolivre.py | 26 + tests/sources/perplexity/__init__.py | 0 tests/sources/perplexity/test_perplexity.py | 37 + tests/sources/petco/__init__.py | 0 tests/sources/petco/test_petco.py | 47 + tests/sources/publix/__init__.py | 0 tests/sources/publix/test_publix.py | 48 + tests/sources/rakuten/__init__.py | 0 tests/sources/rakuten/test_rakuten.py | 26 + tests/sources/shein/__init__.py | 0 tests/sources/shein/test_shein.py | 27 + tests/sources/staples/__init__.py | 0 tests/sources/staples/test_staples.py | 37 + tests/sources/target_store/__init__.py | 0 .../sources/target_store/test_target_store.py | 63 ++ tests/sources/tiktok/__init__.py | 0 tests/sources/tiktok/test_tiktok.py | 77 ++ tests/sources/tokopedia/__init__.py | 0 tests/sources/tokopedia/test_tokopedia.py | 26 + tests/sources/universal/__init__.py | 0 tests/sources/universal/test_universal.py | 57 ++ tests/sources/walmart/__init__.py | 0 tests/sources/walmart/test_walmart.py | 90 ++ tests/sources/youtube/__init__.py | 0 tests/sources/youtube/test_youtube.py | 246 +++++ tests/sources/zillow/__init__.py | 0 tests/sources/zillow/test_zillow.py | 15 + 182 files changed, 13609 insertions(+), 194 deletions(-) create mode 100644 src/oxylabs/sources/asian/__init__.py create mode 100644 src/oxylabs/sources/asian/alibaba/__init__.py create mode 100644 src/oxylabs/sources/asian/alibaba/alibaba.py create mode 100644 src/oxylabs/sources/asian/aliexpress/__init__.py create mode 100644 src/oxylabs/sources/asian/aliexpress/aliexpress.py create mode 100644 src/oxylabs/sources/asian/avnet/__init__.py create mode 100644 src/oxylabs/sources/asian/avnet/avnet.py create mode 100644 src/oxylabs/sources/asian/flipkart/__init__.py create mode 100644 src/oxylabs/sources/asian/flipkart/flipkart.py create mode 100644 src/oxylabs/sources/asian/indiamart/__init__.py create mode 100644 src/oxylabs/sources/asian/indiamart/indiamart.py create mode 100644 src/oxylabs/sources/asian/lazada/__init__.py create mode 100644 src/oxylabs/sources/asian/lazada/lazada.py create mode 100644 src/oxylabs/sources/asian/rakuten/__init__.py create mode 100644 src/oxylabs/sources/asian/rakuten/rakuten.py create mode 100644 src/oxylabs/sources/asian/shein/__init__.py create mode 100644 src/oxylabs/sources/asian/shein/shein.py create mode 100644 src/oxylabs/sources/asian/tokopedia/__init__.py create mode 100644 src/oxylabs/sources/asian/tokopedia/tokopedia.py create mode 100644 src/oxylabs/sources/chatgpt/__init__.py create mode 100644 src/oxylabs/sources/chatgpt/chatgpt.py create mode 100644 src/oxylabs/sources/ebay/__init__.py create mode 100644 src/oxylabs/sources/ebay/ebay.py create mode 100644 src/oxylabs/sources/etsy/__init__.py create mode 100644 src/oxylabs/sources/etsy/etsy.py create mode 100644 src/oxylabs/sources/european/__init__.py create mode 100644 src/oxylabs/sources/european/allegro/__init__.py create mode 100644 src/oxylabs/sources/european/allegro/allegro.py create mode 100644 src/oxylabs/sources/european/cdiscount/__init__.py create mode 100644 src/oxylabs/sources/european/cdiscount/cdiscount.py create mode 100644 src/oxylabs/sources/european/idealo/__init__.py create mode 100644 src/oxylabs/sources/european/idealo/idealo.py create mode 100644 src/oxylabs/sources/european/mediamarkt/__init__.py create mode 100644 src/oxylabs/sources/european/mediamarkt/mediamarkt.py create mode 100644 src/oxylabs/sources/latin_american/__init__.py create mode 100644 src/oxylabs/sources/latin_american/dcard/__init__.py create mode 100644 src/oxylabs/sources/latin_american/dcard/dcard.py create mode 100644 src/oxylabs/sources/latin_american/falabella/__init__.py create mode 100644 src/oxylabs/sources/latin_american/falabella/falabella.py create mode 100644 src/oxylabs/sources/latin_american/magazineluiza/__init__.py create mode 100644 src/oxylabs/sources/latin_american/magazineluiza/magazineluiza.py create mode 100644 src/oxylabs/sources/latin_american/mercadolibre/__init__.py create mode 100644 src/oxylabs/sources/latin_american/mercadolibre/mercadolibre.py create mode 100644 src/oxylabs/sources/latin_american/mercadolivre/__init__.py create mode 100644 src/oxylabs/sources/latin_american/mercadolivre/mercadolivre.py create mode 100644 src/oxylabs/sources/north_american/__init__.py create mode 100644 src/oxylabs/sources/north_american/bedbathandbeyond/__init__.py create mode 100644 src/oxylabs/sources/north_american/bedbathandbeyond/bedbathandbeyond.py create mode 100644 src/oxylabs/sources/north_american/bestbuy/__init__.py create mode 100644 src/oxylabs/sources/north_american/bestbuy/bestbuy.py create mode 100644 src/oxylabs/sources/north_american/bodegaaurrera/__init__.py create mode 100644 src/oxylabs/sources/north_american/bodegaaurrera/bodegaaurrera.py create mode 100644 src/oxylabs/sources/north_american/costco/__init__.py create mode 100644 src/oxylabs/sources/north_american/costco/costco.py create mode 100644 src/oxylabs/sources/north_american/grainger/__init__.py create mode 100644 src/oxylabs/sources/north_american/grainger/grainger.py create mode 100644 src/oxylabs/sources/north_american/instacart/__init__.py create mode 100644 src/oxylabs/sources/north_american/instacart/instacart.py create mode 100644 src/oxylabs/sources/north_american/lowes/__init__.py create mode 100644 src/oxylabs/sources/north_american/lowes/lowes.py create mode 100644 src/oxylabs/sources/north_american/menards/__init__.py create mode 100644 src/oxylabs/sources/north_american/menards/menards.py create mode 100644 src/oxylabs/sources/north_american/petco/__init__.py create mode 100644 src/oxylabs/sources/north_american/petco/petco.py create mode 100644 src/oxylabs/sources/north_american/publix/__init__.py create mode 100644 src/oxylabs/sources/north_american/publix/publix.py create mode 100644 src/oxylabs/sources/north_american/staples/__init__.py create mode 100644 src/oxylabs/sources/north_american/staples/staples.py create mode 100644 src/oxylabs/sources/north_american/target_store/__init__.py create mode 100644 src/oxylabs/sources/north_american/target_store/target_store.py create mode 100644 src/oxylabs/sources/north_american/walmart/__init__.py create mode 100644 src/oxylabs/sources/north_american/walmart/walmart.py create mode 100644 src/oxylabs/sources/perplexity/__init__.py create mode 100644 src/oxylabs/sources/perplexity/perplexity.py create mode 100644 src/oxylabs/sources/real_estate/__init__.py create mode 100644 src/oxylabs/sources/real_estate/airbnb/__init__.py create mode 100644 src/oxylabs/sources/real_estate/airbnb/airbnb.py create mode 100644 src/oxylabs/sources/real_estate/zillow/__init__.py create mode 100644 src/oxylabs/sources/real_estate/zillow/zillow.py create mode 100644 src/oxylabs/sources/tiktok/__init__.py create mode 100644 src/oxylabs/sources/tiktok/tiktok.py create mode 100644 src/oxylabs/sources/youtube/__init__.py create mode 100644 src/oxylabs/sources/youtube/youtube.py create mode 100644 tests/.DS_Store create mode 100644 tests/sources/airbnb/__init__.py create mode 100644 tests/sources/airbnb/test_airbnb.py create mode 100644 tests/sources/alibaba/__init__.py create mode 100644 tests/sources/alibaba/test_alibaba.py create mode 100644 tests/sources/aliexpress/__init__.py create mode 100644 tests/sources/aliexpress/test_aliexpress.py create mode 100644 tests/sources/allegro/__init__.py create mode 100644 tests/sources/allegro/test_allegro.py create mode 100644 tests/sources/amazon/test_amazon.py create mode 100644 tests/sources/avnet/__init__.py create mode 100644 tests/sources/avnet/test_avnet.py create mode 100644 tests/sources/bedbathandbeyond/__init__.py create mode 100644 tests/sources/bedbathandbeyond/test_bedbathandbeyond.py create mode 100644 tests/sources/bestbuy/__init__.py create mode 100644 tests/sources/bestbuy/test_bestbuy.py create mode 100644 tests/sources/bodegaaurrera/__init__.py create mode 100644 tests/sources/bodegaaurrera/test_bodegaaurrera.py create mode 100644 tests/sources/cdiscount/__init__.py create mode 100644 tests/sources/cdiscount/test_cdiscount.py create mode 100644 tests/sources/chatgpt/__init__.py create mode 100644 tests/sources/chatgpt/test_chatgpt.py create mode 100644 tests/sources/costco/__init__.py create mode 100644 tests/sources/costco/test_costco.py create mode 100644 tests/sources/dcard/__init__.py create mode 100644 tests/sources/dcard/test_dcard.py create mode 100644 tests/sources/ebay/__init__.py create mode 100644 tests/sources/ebay/test_ebay.py create mode 100644 tests/sources/etsy/__init__.py create mode 100644 tests/sources/etsy/test_etsy.py create mode 100644 tests/sources/falabella/__init__.py create mode 100644 tests/sources/falabella/test_falabella.py create mode 100644 tests/sources/flipkart/__init__.py create mode 100644 tests/sources/flipkart/test_flipkart.py create mode 100644 tests/sources/google/__init__.py create mode 100644 tests/sources/google/test_google.py create mode 100644 tests/sources/google_shopping/__init__.py create mode 100644 tests/sources/google_shopping/test_google_shopping.py create mode 100644 tests/sources/grainger/__init__.py create mode 100644 tests/sources/grainger/test_grainger.py create mode 100644 tests/sources/idealo/__init__.py create mode 100644 tests/sources/idealo/test_idealo.py create mode 100644 tests/sources/indiamart/__init__.py create mode 100644 tests/sources/indiamart/test_indiamart.py create mode 100644 tests/sources/instacart/__init__.py create mode 100644 tests/sources/instacart/test_instacart.py create mode 100644 tests/sources/kroger/__init__.py create mode 100644 tests/sources/kroger/test_kroger.py create mode 100644 tests/sources/lazada/__init__.py create mode 100644 tests/sources/lazada/test_lazada.py create mode 100644 tests/sources/lowes/__init__.py create mode 100644 tests/sources/lowes/test_lowes.py create mode 100644 tests/sources/magazineluiza/__init__.py create mode 100644 tests/sources/magazineluiza/test_magazineluiza.py create mode 100644 tests/sources/mediamarkt/__init__.py create mode 100644 tests/sources/mediamarkt/test_mediamarkt.py create mode 100644 tests/sources/menards/__init__.py create mode 100644 tests/sources/menards/test_menards.py create mode 100644 tests/sources/mercadolibre/__init__.py create mode 100644 tests/sources/mercadolibre/test_mercadolibre.py create mode 100644 tests/sources/mercadolivre/__init__.py create mode 100644 tests/sources/mercadolivre/test_mercadolivre.py create mode 100644 tests/sources/perplexity/__init__.py create mode 100644 tests/sources/perplexity/test_perplexity.py create mode 100644 tests/sources/petco/__init__.py create mode 100644 tests/sources/petco/test_petco.py create mode 100644 tests/sources/publix/__init__.py create mode 100644 tests/sources/publix/test_publix.py create mode 100644 tests/sources/rakuten/__init__.py create mode 100644 tests/sources/rakuten/test_rakuten.py create mode 100644 tests/sources/shein/__init__.py create mode 100644 tests/sources/shein/test_shein.py create mode 100644 tests/sources/staples/__init__.py create mode 100644 tests/sources/staples/test_staples.py create mode 100644 tests/sources/target_store/__init__.py create mode 100644 tests/sources/target_store/test_target_store.py create mode 100644 tests/sources/tiktok/__init__.py create mode 100644 tests/sources/tiktok/test_tiktok.py create mode 100644 tests/sources/tokopedia/__init__.py create mode 100644 tests/sources/tokopedia/test_tokopedia.py create mode 100644 tests/sources/universal/__init__.py create mode 100644 tests/sources/universal/test_universal.py create mode 100644 tests/sources/walmart/__init__.py create mode 100644 tests/sources/walmart/test_walmart.py create mode 100644 tests/sources/youtube/__init__.py create mode 100644 tests/sources/youtube/test_youtube.py create mode 100644 tests/sources/zillow/__init__.py create mode 100644 tests/sources/zillow/test_zillow.py diff --git a/README.md b/README.md index f9b5954..952cd60 100644 --- a/README.md +++ b/README.md @@ -67,15 +67,15 @@ password = "password" # Initialize the Realtime client with your credentials. client = RealtimeClient(username, password) -# Use `bing_search` as a source to scrape Bing with nike as a query. -result = client.bing.scrape_search("nike") +# Use `google_search` as a source to scrape Google with nike as a query. +result = client.google.scrape_search("nike") print(result.raw) ``` ### Integration Methods -There are three integration methods for the Oxylabs SERP API, each exposed via +There are three integration methods for the Oxylabs Web Scraper API, each exposed via different packages: - Realtime (Sync) - `RealtimeClient(username, password)` @@ -87,35 +87,64 @@ and how this SDK uses them [here](#integration-methods-1). ### Sources -The Oxylabs API scrapes according to the sources provided via the API: - -| Target | Sources -|------------------------| -------------- -| **Amazon** | `amazon`, `amazon_product`, `amazon_search`, `amazon_pricing`, `amazon_sellers`, `amazon_bestsellers`, `amazon_reviews`, `amazon_questions` -| **Google** | `google`, `google_search`, `google_ads`, `google_travel_hotels`, `google_suggest`,`google_trends_explore`,`google_lens` -| **Bing** | `bing`, `bing_search` -| **Kroger** | `kroger`, `kroger_product`, `kroger_search` -| **Wayfair** | `wayfair`, `wayfair_search` -| **Other Websites** | `universal` - -These are the equivalent targets and methods available for scraping in the Python SDK: - -| Target | Methods -|------------------------| -------------- -| **amazon** | `scrape_search`, `scrape_url`, `scrape_product`, `scrape_pricing`, `scrape_reviews`, `scrape_questions`, `scrape_bestsellers`, `scrape_sellers` -| **bing** | `scrape_search`, `scrape_url` -| **google** | `scrape_search`, `scrape_url`, `scrape_ads`, `scrape_suggestions`, `scrape_travel_hotels`, `scrape_images`, `scrape_trends_explore`, `scrape_lens` -| **kroger** | `scrape_product`, `scrape_search`, `scrape_url` -| **wayfair** | `scrape_search`, `scrape_url` -| **universal** | `scrape_url` +These are the targets and methods available for scraping in the Python SDK: + +| Target | Methods +|---------------------------| -------------- +| **amazon** | `scrape_search`, `scrape_url`, `scrape_product`, `scrape_pricing`, `scrape_reviews`, `scrape_questions`, `scrape_bestsellers`, `scrape_sellers` +| **bing** | `scrape_search`, `scrape_url` +| **google** | `scrape_search`, `scrape_url`, `scrape_ads`, `scrape_suggestions`, `scrape_travel_hotels`, `scrape_images`, `scrape_trends_explore`, `scrape_lens`, `scrape_ai_mode`, `scrape_news` +| **google_shopping** | `scrape_shopping_search`, `scrape_shopping_url`, `scrape_shopping_products` +| **ebay** | `scrape_search`, `scrape_product`, `scrape_url` +| **etsy** | `scrape_search`, `scrape_product`, `scrape_url` +| **youtube** | `scrape_search`, `scrape_search_max`, `scrape_metadata`, `scrape_channel`, `scrape_subtitles`, `scrape_transcript`, `scrape_video_trainability`, `scrape_autocomplete`, `scrape_download` (async only) +| **tiktok** | `scrape_shop_search`, `scrape_shop_product`, `scrape_shop_url` +| **chatgpt** | `scrape` +| **perplexity** | `scrape` +| **kroger** | `scrape_product`, `scrape_search`, `scrape_url` +| **walmart** | `scrape_search`, `scrape_product`, `scrape_url` +| **bestbuy** | `scrape_search`, `scrape_product` +| **target_store** | `scrape_search`, `scrape_product`, `scrape_category`, `scrape_url` +| **costco** | `scrape_search`, `scrape_product`, `scrape_url` +| **lowes** | `scrape_search`, `scrape_product`, `scrape_url` +| **menards** | `scrape_search`, `scrape_product`, `scrape_url` +| **instacart** | `scrape_search`, `scrape_product`, `scrape_url` +| **bedbathandbeyond** | `scrape_search`, `scrape_product`, `scrape_url` +| **petco** | `scrape_search`, `scrape_url` +| **grainger** | `scrape_search`, `scrape_product`, `scrape_url` +| **publix** | `scrape_search`, `scrape_product`, `scrape_url` +| **staples** | `scrape_search` +| **wayfair** | `scrape_search`, `scrape_url` +| **allegro** | `scrape_search`, `scrape_product` +| **cdiscount** | `scrape_search`, `scrape_product`, `scrape_url` +| **idealo** | `scrape_search` +| **mediamarkt** | `scrape_search`, `scrape_product`, `scrape_url` +| **alibaba** | `scrape_search`, `scrape_product`, `scrape_url` +| **aliexpress** | `scrape_search`, `scrape_product`, `scrape_url` +| **flipkart** | `scrape_search`, `scrape_product`, `scrape_url` +| **indiamart** | `scrape_search`, `scrape_product`, `scrape_url` +| **lazada** | `scrape_search`, `scrape_product`, `scrape_url` +| **rakuten** | `scrape_search`, `scrape_url` +| **tokopedia** | `scrape_search`, `scrape_url` +| **shein** | `scrape_search` +| **avnet** | `scrape_search` +| **dcard** | `scrape_search` +| **mercadolibre** | `scrape_search`, `scrape_product`, `scrape_url` +| **mercadolivre** | `scrape_search`, `scrape_product` +| **magazineluiza** | `scrape_search`, `scrape_product`, `scrape_url` +| **falabella** | `scrape_search`, `scrape_product`, `scrape_url` +| **bodegaaurrera** | `scrape_search`, `scrape_product`, `scrape_url` +| **airbnb** | `scrape_url`, `scrape_product` +| **zillow** | `scrape_url` +| **universal** | `scrape_url` In the SDK you'll just need to call the relevant method name from the client. -For example if you wish to scrape Bing search you can do it with the following code: +For example if you wish to scrape Google search you can do it with the following code: ```python client = RealtimeClient(username, password) -result = client.bing.scrape_search("football") +result = client.google.scrape_search("football") ``` ### Query Parameters @@ -129,7 +158,7 @@ specific query parameters, here is an example of how to do it: ```python client = RealtimeClient(username, password) -result = client.bing.scrape_search( +result = client.google.scrape_search( "football", start_page=1, pages=3, @@ -200,18 +229,18 @@ SDK supports [custom parsing](https://developers.oxylabs.io/scraper-apis/custom- you define your own parsing and data processing logic that is executed on a raw scraping result. ```python -# Use `bing_search` as a source to scrape Bing using custom parsing +# Use `google_search` as a source to scrape Google using custom parsing # instructions. client = RealtimeClient(username, password) -result = client.bing.scrape_url( - "https://www.bing.com/search?q=nike", +result = client.google.scrape_url( + "https://www.google.com/search?q=nike", parse=True, parsing_instructions={ "number_of_results": { "_fns": [ { "_fn": "xpath_one", - "_args": [".//span[@class='sb_count']/text()"], + "_args": [".//div[@id='result-stats']/text()"], } ] } @@ -267,7 +296,7 @@ response = client.amazon.scrape_search("headset", parse=True) for result in response.results: for item in result.content["results"]["organic"]: - print(f"{item["asin"]}: {item["title"]}") + print(f'{item["asin"]}: {item["title"]}') ``` ## Integration Methods @@ -305,24 +334,24 @@ async def main(): # Initialize the async client with your credentials. client = AsyncClient(username, password) - # 'timeout' specifies the maximum time (in seconds) to wait for the scraping + # 'job_completion_timeout' specifies the maximum time (in seconds) to wait for the scraping # job to complete. # It is applicable for both Realtime and Push-Pull integrations. # 'poll_interval' is used only in Push-Pull integrations to set the delay # (in seconds) # between consecutive status checks of the job. tasks = [ - client.bing.scrape_url( - "https://www.bing.com/search?q=adidas", + client.google.scrape_url( + "https://www.google.com/search?q=adidas", parse=True, - timeout=35, + job_completion_timeout=35, poll_interval=3, ), - client.bing.scrape_url( - "https://www.bing.com/search?q=puma", + client.google.scrape_url( + "https://www.google.com/search?q=puma", parse=True, timeout=45, - poll_interval=5, + job_completion_timeout=5, ), ] diff --git a/src/oxylabs/_version.py b/src/oxylabs/_version.py index 8c0d5d5..528787c 100644 --- a/src/oxylabs/_version.py +++ b/src/oxylabs/_version.py @@ -1 +1 @@ -__version__ = "2.0.0" +__version__ = "3.0.0" diff --git a/src/oxylabs/internal/api.py b/src/oxylabs/internal/api.py index dc864b8..4249e90 100644 --- a/src/oxylabs/internal/api.py +++ b/src/oxylabs/internal/api.py @@ -4,6 +4,7 @@ import aiohttp import asyncio from platform import python_version, architecture +from typing import Optional from oxylabs._version import __version__ from oxylabs.utils.defaults import ASYNC_BASE_URL, SYNC_BASE_URL from oxylabs.utils.utils import ensure_session, close_session @@ -68,7 +69,7 @@ def get_response(self, payload:dict, config:dict) -> dict: return self._get_http_response(payload, "POST", config) - def _get_http_response(self, payload: dict, method: str, config: dict) -> dict | None: + def _get_http_response(self, payload: dict, method: str, config: dict) -> Optional[dict]: """ Sends an HTTP request to the specified URL with the given payload and method. @@ -136,7 +137,7 @@ def __init__(self, api_credentials: APICredentials, **kwargs) -> None: self._session = None self._requests = 0 - async def get_response(self, payload: dict, config: dict) -> dict | None: + async def get_response(self, payload: dict, config: dict) -> Optional[dict]: """ Processes the payload asynchronously and fetches the response. @@ -175,7 +176,7 @@ async def _get_job_id( payload: dict, user_session: aiohttp.ClientSession, request_timeout: int, - ) -> str | None: + ) -> Optional[str]: try: async with user_session.post( self._base_url, @@ -232,7 +233,7 @@ async def _poll_job_status( async def _get_http_response( self, job_id: str, user_session: aiohttp.ClientSession - ) -> dict | None: + ) -> Optional[dict]: """ Retrieves the HTTP response for a given job ID. diff --git a/src/oxylabs/internal/client.py b/src/oxylabs/internal/client.py index ca14bc5..10a21a9 100644 --- a/src/oxylabs/internal/client.py +++ b/src/oxylabs/internal/client.py @@ -1,13 +1,51 @@ import logging from oxylabs.internal.api import APICredentials, RealtimeAPI, AsyncAPI +from oxylabs.sources.real_estate.airbnb import Airbnb, AirbnbAsync +from oxylabs.sources.real_estate.zillow import Zillow, ZillowAsync from oxylabs.sources.amazon import Amazon, AmazonAsync +from oxylabs.sources.asian.alibaba import Alibaba, AlibabaAsync +from oxylabs.sources.asian.aliexpress import Aliexpress, AliexpressAsync +from oxylabs.sources.asian.avnet import Avnet, AvnetAsync +from oxylabs.sources.asian.flipkart import Flipkart, FlipkartAsync +from oxylabs.sources.asian.indiamart import Indiamart, IndiamartAsync +from oxylabs.sources.asian.lazada import Lazada, LazadaAsync +from oxylabs.sources.asian.rakuten import Rakuten, RakutenAsync +from oxylabs.sources.asian.tokopedia import Tokopedia, TokopediaAsync from oxylabs.sources.bing import Bing, BingAsync +from oxylabs.sources.north_american.bedbathandbeyond import Bedbathandbeyond, BedbathandbeyondAsync +from oxylabs.sources.north_american.bestbuy import Bestbuy, BestbuyAsync +from oxylabs.sources.north_american.bodegaaurrera import Bodegaaurrera, BodegaaurreraAsync +from oxylabs.sources.north_american.costco import Costco, CostcoAsync +from oxylabs.sources.north_american.grainger import Grainger, GraingerAsync +from oxylabs.sources.north_american.instacart import Instacart, InstacartAsync +from oxylabs.sources.north_american.lowes import Lowes, LowesAsync +from oxylabs.sources.north_american.menards import Menards, MenardsAsync +from oxylabs.sources.north_american.petco import Petco, PetcoAsync +from oxylabs.sources.north_american.publix import Publix, PublixAsync +from oxylabs.sources.north_american.target_store import TargetStore, TargetStoreAsync +from oxylabs.sources.european.allegro import Allegro, AllegroAsync +from oxylabs.sources.european.cdiscount import Cdiscount, CdiscountAsync +from oxylabs.sources.european.idealo import Idealo, IdealoAsync +from oxylabs.sources.european.mediamarkt import Mediamarkt, MediamarktAsync +from oxylabs.sources.chatgpt import Chatgpt, ChatgptAsync +from oxylabs.sources.latin_american.dcard import Dcard, DcardAsync +from oxylabs.sources.ebay import Ebay, EbayAsync +from oxylabs.sources.etsy import Etsy, EtsyAsync from oxylabs.sources.google import Google, GoogleAsync from oxylabs.sources.google_shopping import GoogleShopping, GoogleShoppingAsync from oxylabs.sources.kroger import Kroger, KrogerAsync +from oxylabs.sources.perplexity import Perplexity, PerplexityAsync +from oxylabs.sources.asian.shein import Shein, SheinAsync +from oxylabs.sources.north_american.staples import Staples, StaplesAsync +from oxylabs.sources.north_american.walmart import Walmart, WalmartAsync +from oxylabs.sources.latin_american.falabella import Falabella, FalabellaAsync +from oxylabs.sources.latin_american.mercadolibre import Mercadolibre, MercadolibreAsync +from oxylabs.sources.latin_american.mercadolivre import Mercadolivre, MercadolivreAsync +from oxylabs.sources.latin_american.magazineluiza import Magazineluiza, MagazineluizaAsync +from oxylabs.sources.tiktok import Tiktok, TiktokAsync from oxylabs.sources.universal import Universal, UniversalAsync from oxylabs.sources.wayfair import Wayfair, WayfairAsync -from oxylabs.sources.youtube_transcript import YoutubeTranscript, YoutubeTranscriptAsync +from oxylabs.sources.youtube import Youtube, YoutubeAsync # Configure logging logging.basicConfig(level=logging.INFO) @@ -23,14 +61,52 @@ def __init__(self, username: str, password: str, **kwargs) -> None: password (str): The password for API authentication. """ api = RealtimeAPI(APICredentials(username, password), **kwargs) + self.airbnb = Airbnb(api) + self.alibaba = Alibaba(api) + self.aliexpress = Aliexpress(api) + self.allegro = Allegro(api) self.amazon = Amazon(api) + self.avnet = Avnet(api) + self.bedbathandbeyond = Bedbathandbeyond(api) + self.bestbuy = Bestbuy(api) + self.bodegaaurrera = Bodegaaurrera(api) self.bing = Bing(api) + self.cdiscount = Cdiscount(api) + self.chatgpt = Chatgpt(api) + self.costco = Costco(api) + self.dcard = Dcard(api) + self.ebay = Ebay(api) + self.etsy = Etsy(api) + self.falabella = Falabella(api) + self.flipkart = Flipkart(api) + self.indiamart = Indiamart(api) + self.lazada = Lazada(api) + self.rakuten = Rakuten(api) + self.tokopedia = Tokopedia(api) self.google = Google(api) self.google_shopping = GoogleShopping(api) + self.grainger = Grainger(api) + self.idealo = Idealo(api) + self.instacart = Instacart(api) self.kroger = Kroger(api) + self.lowes = Lowes(api) + self.mediamarkt = Mediamarkt(api) + self.menards = Menards(api) + self.mercadolibre = Mercadolibre(api) + self.mercadolivre = Mercadolivre(api) + self.magazineluiza = Magazineluiza(api) + self.petco = Petco(api) + self.publix = Publix(api) + self.target_store = TargetStore(api) + self.perplexity = Perplexity(api) + self.shein = Shein(api) + self.staples = Staples(api) + self.tiktok = Tiktok(api) self.universal = Universal(api) self.wayfair = Wayfair(api) - self.youtube_transcript = YoutubeTranscript(api) + self.walmart = Walmart(api) + self.youtube = Youtube(api) + self.zillow = Zillow(api) class AsyncClient: def __init__(self, username: str, password: str, **kwargs) -> None: @@ -42,13 +118,51 @@ def __init__(self, username: str, password: str, **kwargs) -> None: password (str): The password for API authentication. """ api = AsyncAPI(APICredentials(username, password), **kwargs) + self.airbnb = AirbnbAsync(api) + self.alibaba = AlibabaAsync(api) + self.aliexpress = AliexpressAsync(api) + self.allegro = AllegroAsync(api) self.amazon = AmazonAsync(api) + self.avnet = AvnetAsync(api) + self.bedbathandbeyond = BedbathandbeyondAsync(api) + self.bestbuy = BestbuyAsync(api) + self.bodegaaurrera = BodegaaurreraAsync(api) self.bing = BingAsync(api) + self.cdiscount = CdiscountAsync(api) + self.chatgpt = ChatgptAsync(api) + self.costco = CostcoAsync(api) + self.dcard = DcardAsync(api) + self.ebay = EbayAsync(api) + self.etsy = EtsyAsync(api) + self.falabella = FalabellaAsync(api) + self.flipkart = FlipkartAsync(api) + self.indiamart = IndiamartAsync(api) + self.lazada = LazadaAsync(api) + self.rakuten = RakutenAsync(api) + self.tokopedia = TokopediaAsync(api) self.google = GoogleAsync(api) self.google_shopping = GoogleShoppingAsync(api) + self.grainger = GraingerAsync(api) + self.idealo = IdealoAsync(api) + self.instacart = InstacartAsync(api) self.kroger = KrogerAsync(api) + self.lowes = LowesAsync(api) + self.mediamarkt = MediamarktAsync(api) + self.menards = MenardsAsync(api) + self.mercadolibre = MercadolibreAsync(api) + self.mercadolivre = MercadolivreAsync(api) + self.magazineluiza = MagazineluizaAsync(api) + self.petco = PetcoAsync(api) + self.publix = PublixAsync(api) + self.target_store = TargetStoreAsync(api) + self.perplexity = PerplexityAsync(api) + self.shein = SheinAsync(api) + self.staples = StaplesAsync(api) + self.tiktok = TiktokAsync(api) self.universal = UniversalAsync(api) self.wayfair = WayfairAsync(api) - self.youtube_transcript = YoutubeTranscriptAsync(api) + self.walmart = WalmartAsync(api) + self.youtube = YoutubeAsync(api) + self.zillow = ZillowAsync(api) diff --git a/src/oxylabs/sources/amazon/amazon.py b/src/oxylabs/sources/amazon/amazon.py index 37bf112..5fcf81e 100644 --- a/src/oxylabs/sources/amazon/amazon.py +++ b/src/oxylabs/sources/amazon/amazon.py @@ -23,8 +23,11 @@ def scrape_search( self, query: str, domain: Optional[str] = None, + locale: Optional[str] = None, start_page: Optional[int] = None, pages: Optional[int] = None, + sort_by: Optional[str] = None, + refinements: Optional[str] = None, geo_location: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, @@ -41,8 +44,11 @@ def scrape_search( Args: query (str): The search query. domain (Optional[str]): The domain to limit the search results to. + locale (Optional[str]): The locale of the results. start_page (Optional[int]): The starting page number. pages (Optional[int]): The number of pages to scrape. + sort_by (Optional[str]): The sorting parameter. + refinements (Optional[str]): The refinements parameter. geo_location (Optional[str]): The Deliver to location. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. @@ -63,8 +69,11 @@ def scrape_search( "source": source.AMAZON_SEARCH, "query": query, "domain": domain, + "locale": locale, "start_page": start_page, "pages": pages, + "sort_by": sort_by, + "refinements": refinements, "geo_location": geo_location, "user_agent_type": user_agent_type, "render": render, @@ -81,9 +90,12 @@ def scrape_search( def scrape_url( self, url: str, + geo_location: Optional[str] = None, + locale: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, callback_url: Optional[str] = None, + context: Optional[list] = None, parse: Optional[bool] = None, parsing_instructions: Optional[dict] = None, request_timeout: Optional[int] = 165, @@ -95,9 +107,12 @@ def scrape_url( Args: url (str): The URL to scrape. domain (Optional[str]): The domain to limit the search results to. + geo_location (Optional[str]): The Deliver to location. + locale (Optional[str]): The locale of the results. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. render (Optional[str]): Enables JavaScript rendering. + context (Optional[list]): Context parameters. parse (Optional[bool]): true will return structured data. parsing_instructions (Optional[dict]): Instructions for parsing the results. request_timeout (int | 165, optional): The interval in seconds for @@ -112,9 +127,12 @@ def scrape_url( payload = { "source": source.AMAZON_URL, "url": url, + "geo_location": geo_location, + "locale": locale, "user_agent_type": user_agent_type, "render": render, "callback_url": callback_url, + "context": context, "parse": parse, "parsing_instructions": parsing_instructions, **kwargs, @@ -127,6 +145,7 @@ def scrape_product( self, query: str, domain: Optional[str] = None, + locale: Optional[str] = None, geo_location: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, @@ -143,6 +162,7 @@ def scrape_product( Args: query (str): 10-symbol ASIN code. domain (Optional[str]): The domain to limit the search results to. + locale (Optional[str]): The locale of the results. geo_location (Optional[str]): The Deliver to location. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. @@ -163,6 +183,7 @@ def scrape_product( "source": source.AMAZON_PRODUCT, "query": query, "domain": domain, + "locale": locale, "geo_location": geo_location, "user_agent_type": user_agent_type, "render": render, @@ -180,12 +201,14 @@ def scrape_pricing( self, query: str, domain: Optional[str] = None, + locale: Optional[str] = None, start_page: Optional[int] = None, pages: Optional[int] = None, geo_location: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, callback_url: Optional[str] = None, + context: Optional[list] = None, parse: Optional[bool] = None, parsing_instructions: Optional[dict] = None, request_timeout: Optional[int] = 165, @@ -197,12 +220,14 @@ def scrape_pricing( Args: query (str): 10-symbol ASIN code. domain (Optional[str]): The domain to limit the search results to. + locale (Optional[str]): The locale of the results. start_page (Optional[int]): The starting page number. pages (Optional[int]): The number of pages to scrape. geo_location (Optional[str]): The Deliver to location. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. render (Optional[str]): Enables JavaScript rendering. + context (Optional[list]): Context parameters. parse (Optional[bool]): true will return structured data. parsing_instructions (Optional[dict]): Instructions for parsing the results. request_timeout (int | 165, optional): The interval in seconds for @@ -218,12 +243,14 @@ def scrape_pricing( "source": source.AMAZON_PRICING, "query": query, "domain": domain, + "locale": locale, "start_page": start_page, "pages": pages, "geo_location": geo_location, "user_agent_type": user_agent_type, "render": render, "callback_url": callback_url, + "context": context, "parse": parse, "parsing_instructions": parsing_instructions, **kwargs, @@ -342,12 +369,14 @@ def scrape_bestsellers( self, query: str, domain: Optional[str] = None, + locale: Optional[str] = None, start_page: Optional[int] = None, pages: Optional[int] = None, geo_location: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, callback_url: Optional[str] = None, + context: Optional[list] = None, parse: Optional[bool] = None, parsing_instructions: Optional[dict] = None, request_timeout: Optional[int] = 165, @@ -359,12 +388,14 @@ def scrape_bestsellers( Args: query (str): Browse node ID. domain (Optional[str]): The domain to limit the search results to. + locale (Optional[str]): The locale of the results. start_page (Optional[int]): The starting page number. pages (Optional[int]): The number of pages to scrape. geo_location (Optional[str]): The Deliver to location. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. render (Optional[str]): Enables JavaScript rendering. + context (Optional[list]): Context parameters. parse (Optional[bool]): true will return structured data. parsing_instructions (Optional[dict]): Instructions for parsing the results. request_timeout (int | 165, optional): The interval in seconds for @@ -380,12 +411,14 @@ def scrape_bestsellers( "source": source.AMAZON_BEST_SELLERS, "query": query, "domain": domain, + "locale": locale, "start_page": start_page, "pages": pages, "geo_location": geo_location, "user_agent_type": user_agent_type, "render": render, "callback_url": callback_url, + "context": context, "parse": parse, "parsing_instructions": parsing_instructions, **kwargs, @@ -398,6 +431,7 @@ def scrape_sellers( self, query: str, domain: Optional[str] = None, + locale: Optional[str] = None, geo_location: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, @@ -413,6 +447,7 @@ def scrape_sellers( Args: query (str): 13-character seller ID. domain (Optional[str]): The domain to limit the search results to. + locale (Optional[str]): The locale of the results. geo_location (Optional[str]): The Deliver to location. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. @@ -432,6 +467,7 @@ def scrape_sellers( "source": source.AMAZON_SELLERS, "query": query, "domain": domain, + "locale": locale, "geo_location": geo_location, "user_agent_type": user_agent_type, "render": render, @@ -459,8 +495,11 @@ async def scrape_search( self, query: str, domain: Optional[str] = None, + locale: Optional[str] = None, start_page: Optional[int] = None, pages: Optional[int] = None, + sort_by: Optional[str] = None, + refinements: Optional[str] = None, geo_location: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, @@ -479,8 +518,11 @@ async def scrape_search( Args: query (str): The search query. domain (Optional[str]): The domain to limit the search results to. + locale (Optional[str]): The locale of the results. start_page (Optional[int]): The starting page number. pages (Optional[int]): The number of pages to scrape. + sort_by (Optional[str]): The sorting parameter. + refinements (Optional[str]): The refinements parameter. geo_location (Optional[str]): The Deliver to location. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. @@ -509,8 +551,11 @@ async def scrape_search( "source": source.AMAZON_SEARCH, "query": query, "domain": domain, + "locale": locale, "start_page": start_page, "pages": pages, + "sort_by": sort_by, + "refinements": refinements, "geo_location": geo_location, "user_agent_type": user_agent_type, "render": render, @@ -527,9 +572,12 @@ async def scrape_search( async def scrape_url( self, url: str, + geo_location: Optional[str] = None, + locale: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, callback_url: Optional[str] = None, + context: Optional[list] = None, parse: Optional[bool] = None, parsing_instructions: Optional[dict] = None, request_timeout: Optional[int] = 165, @@ -543,9 +591,12 @@ async def scrape_url( Args: url (str): The URL to scrape. domain (Optional[str]): The domain to limit the search results to. + geo_location (Optional[str]): The Deliver to location. + locale (Optional[str]): The locale of the results. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. render (Optional[str]): Enables JavaScript rendering. + context (Optional[list]): Context parameters. parse (Optional[bool]): true will return structured data. parsing_instructions (Optional[dict]): Instructions for parsing the results. request_timeout (int | 165, optional): The interval in seconds for @@ -569,9 +620,12 @@ async def scrape_url( payload = { "source": source.AMAZON_URL, "url": url, + "geo_location": geo_location, + "locale": locale, "user_agent_type": user_agent_type, "render": render, "callback_url": callback_url, + "context": context, "parse": parse, "parsing_instructions": parsing_instructions, **kwargs, @@ -584,6 +638,7 @@ async def scrape_product( self, query: str, domain: Optional[str] = None, + locale: Optional[str] = None, geo_location: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, @@ -602,6 +657,7 @@ async def scrape_product( Args: query (str): 10-symbol ASIN code. domain (Optional[str]): The domain to limit the search results to. + locale (Optional[str]): The locale of the results. geo_location (Optional[str]): The Deliver to location. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. @@ -631,6 +687,7 @@ async def scrape_product( "source": source.AMAZON_PRODUCT, "query": query, "domain": domain, + "locale": locale, "geo_location": geo_location, "user_agent_type": user_agent_type, "render": render, @@ -648,12 +705,14 @@ async def scrape_pricing( self, query: str, domain: Optional[str] = None, + locale: Optional[str] = None, start_page: Optional[int] = None, pages: Optional[int] = None, geo_location: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, callback_url: Optional[str] = None, + context: Optional[list] = None, parse: Optional[bool] = None, parsing_instructions: Optional[dict] = None, request_timeout: Optional[int] = 165, @@ -667,12 +726,14 @@ async def scrape_pricing( Args: query (str): 10-symbol ASIN code. domain (Optional[str]): The domain to limit the search results to. + locale (Optional[str]): The locale of the results. start_page (Optional[int]): The starting page number. pages (Optional[int]): The number of pages to scrape. geo_location (Optional[str]): The Deliver to location. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. render (Optional[str]): Enables JavaScript rendering. + context (Optional[list]): Context parameters. parse (Optional[bool]): true will return structured data. parsing_instructions (Optional[dict]): Instructions for parsing the results. request_timeout (int | 165, optional): The interval in seconds for @@ -697,12 +758,14 @@ async def scrape_pricing( "source": source.AMAZON_PRICING, "query": query, "domain": domain, + "locale": locale, "start_page": start_page, "pages": pages, "geo_location": geo_location, "user_agent_type": user_agent_type, "render": render, "callback_url": callback_url, + "context": context, "parse": parse, "parsing_instructions": parsing_instructions, **kwargs, @@ -843,12 +906,14 @@ async def scrape_bestsellers( self, query, domain: Optional[str] = None, + locale: Optional[str] = None, start_page: Optional[int] = None, pages: Optional[int] = None, geo_location: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, callback_url: Optional[str] = None, + context: Optional[list] = None, parse: Optional[bool] = None, parsing_instructions: Optional[dict] = None, request_timeout: Optional[int] = 165, @@ -862,12 +927,14 @@ async def scrape_bestsellers( Args: query (str): Browse node ID. domain (Optional[str]): The domain to limit the search results to. + locale (Optional[str]): The locale of the results. start_page (Optional[int]): The starting page number. pages (Optional[int]): The number of pages to scrape. geo_location (Optional[str]): The Deliver to location. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. render (Optional[str]): Enables JavaScript rendering. + context (Optional[list]): Context parameters. parse (Optional[bool]): true will return structured data. parsing_instructions (Optional[dict]): Instructions for parsing the results. request_timeout (int | 165, optional): The interval in seconds for @@ -892,12 +959,14 @@ async def scrape_bestsellers( "source": source.AMAZON_BEST_SELLERS, "query": query, "domain": domain, + "locale": locale, "start_page": start_page, "pages": pages, "geo_location": geo_location, "user_agent_type": user_agent_type, "render": render, "callback_url": callback_url, + "context": context, "parse": parse, "parsing_instructions": parsing_instructions, **kwargs, @@ -910,6 +979,7 @@ async def scrape_sellers( self, query: str, domain: Optional[str] = None, + locale: Optional[str] = None, geo_location: Optional[str] = None, user_agent_type: Optional[str] = None, render: Optional[str] = None, @@ -927,6 +997,7 @@ async def scrape_sellers( Args: query (str): 13-character seller ID. domain (Optional[str]): The domain to limit the search results to. + locale (Optional[str]): The locale of the results. geo_location (Optional[str]): The Deliver to location. user_agent_type (Optional[str]): Device type and browser. callback_url (Optional[str]): URL to your callback endpoint. @@ -955,6 +1026,7 @@ async def scrape_sellers( "source": source.AMAZON_SELLERS, "query": query, "domain": domain, + "locale": locale, "geo_location": geo_location, "user_agent_type": user_agent_type, "render": render, diff --git a/src/oxylabs/sources/asian/__init__.py b/src/oxylabs/sources/asian/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/oxylabs/sources/asian/alibaba/__init__.py b/src/oxylabs/sources/asian/alibaba/__init__.py new file mode 100644 index 0000000..e748bf9 --- /dev/null +++ b/src/oxylabs/sources/asian/alibaba/__init__.py @@ -0,0 +1 @@ +from .alibaba import Alibaba, AlibabaAsync diff --git a/src/oxylabs/sources/asian/alibaba/alibaba.py b/src/oxylabs/sources/asian/alibaba/alibaba.py new file mode 100644 index 0000000..5690df7 --- /dev/null +++ b/src/oxylabs/sources/asian/alibaba/alibaba.py @@ -0,0 +1,297 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Alibaba: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Alibaba class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Alibaba search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ALIBABA_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes an Alibaba product page for a given product ID. + + Args: + product_id (str): 13-digit Alibaba product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ALIBABA_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from an Alibaba URL. + + Args: + url (str): Direct URL to any Alibaba page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ALIBABA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class AlibabaAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the AlibabaAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Alibaba search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ALIBABA_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes an Alibaba product page for a given product ID. + + Args: + product_id (str): 13-digit Alibaba product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ALIBABA_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from an Alibaba URL. + + Args: + url (str): Direct URL to any Alibaba page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ALIBABA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/asian/aliexpress/__init__.py b/src/oxylabs/sources/asian/aliexpress/__init__.py new file mode 100644 index 0000000..25c3cec --- /dev/null +++ b/src/oxylabs/sources/asian/aliexpress/__init__.py @@ -0,0 +1 @@ +from .aliexpress import Aliexpress, AliexpressAsync diff --git a/src/oxylabs/sources/asian/aliexpress/aliexpress.py b/src/oxylabs/sources/asian/aliexpress/aliexpress.py new file mode 100644 index 0000000..35efd8b --- /dev/null +++ b/src/oxylabs/sources/asian/aliexpress/aliexpress.py @@ -0,0 +1,297 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Aliexpress: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Aliexpress class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes AliExpress search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ALIEXPRESS_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes an AliExpress product page for a given product ID. + + Args: + product_id (str): AliExpress product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ALIEXPRESS_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from an AliExpress URL. + + Args: + url (str): Direct URL to any AliExpress page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ALIEXPRESS_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class AliexpressAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the AliexpressAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes AliExpress search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ALIEXPRESS_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes an AliExpress product page for a given product ID. + + Args: + product_id (str): AliExpress product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ALIEXPRESS_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from an AliExpress URL. + + Args: + url (str): Direct URL to any AliExpress page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ALIEXPRESS_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/asian/avnet/__init__.py b/src/oxylabs/sources/asian/avnet/__init__.py new file mode 100644 index 0000000..aafed99 --- /dev/null +++ b/src/oxylabs/sources/asian/avnet/__init__.py @@ -0,0 +1 @@ +from .avnet import Avnet, AvnetAsync diff --git a/src/oxylabs/sources/asian/avnet/avnet.py b/src/oxylabs/sources/asian/avnet/avnet.py new file mode 100644 index 0000000..c8e6032 --- /dev/null +++ b/src/oxylabs/sources/asian/avnet/avnet.py @@ -0,0 +1,119 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Avnet: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Avnet class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Avnet search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.AVNET_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class AvnetAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the AvnetAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Avnet search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.AVNET_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/asian/flipkart/__init__.py b/src/oxylabs/sources/asian/flipkart/__init__.py new file mode 100644 index 0000000..b43726f --- /dev/null +++ b/src/oxylabs/sources/asian/flipkart/__init__.py @@ -0,0 +1 @@ +from .flipkart import Flipkart, FlipkartAsync diff --git a/src/oxylabs/sources/asian/flipkart/flipkart.py b/src/oxylabs/sources/asian/flipkart/flipkart.py new file mode 100644 index 0000000..ab47544 --- /dev/null +++ b/src/oxylabs/sources/asian/flipkart/flipkart.py @@ -0,0 +1,297 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Flipkart: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Flipkart class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Flipkart search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.FLIPKART_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Flipkart product page for a given product ID. + + Args: + product_id (str): Flipkart product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.FLIPKART_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Flipkart URL. + + Args: + url (str): Direct URL to any Flipkart page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.FLIPKART_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class FlipkartAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the FlipkartAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Flipkart search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.FLIPKART_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Flipkart product page for a given product ID. + + Args: + product_id (str): Flipkart product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.FLIPKART_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Flipkart URL. + + Args: + url (str): Direct URL to any Flipkart page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.FLIPKART_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/asian/indiamart/__init__.py b/src/oxylabs/sources/asian/indiamart/__init__.py new file mode 100644 index 0000000..9fbd583 --- /dev/null +++ b/src/oxylabs/sources/asian/indiamart/__init__.py @@ -0,0 +1 @@ +from .indiamart import Indiamart, IndiamartAsync diff --git a/src/oxylabs/sources/asian/indiamart/indiamart.py b/src/oxylabs/sources/asian/indiamart/indiamart.py new file mode 100644 index 0000000..5dc65b6 --- /dev/null +++ b/src/oxylabs/sources/asian/indiamart/indiamart.py @@ -0,0 +1,297 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Indiamart: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Indiamart class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes IndiaMART search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.INDIAMART_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes an IndiaMART product page for a given product ID. + + Args: + product_id (str): IndiaMART product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.INDIAMART_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from an IndiaMART URL. + + Args: + url (str): Direct URL to any IndiaMART page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.INDIAMART_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class IndiamartAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the IndiamartAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes IndiaMART search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.INDIAMART_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes an IndiaMART product page for a given product ID. + + Args: + product_id (str): IndiaMART product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.INDIAMART_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from an IndiaMART URL. + + Args: + url (str): Direct URL to any IndiaMART page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.INDIAMART_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/asian/lazada/__init__.py b/src/oxylabs/sources/asian/lazada/__init__.py new file mode 100644 index 0000000..8920740 --- /dev/null +++ b/src/oxylabs/sources/asian/lazada/__init__.py @@ -0,0 +1 @@ +from .lazada import Lazada, LazadaAsync diff --git a/src/oxylabs/sources/asian/lazada/lazada.py b/src/oxylabs/sources/asian/lazada/lazada.py new file mode 100644 index 0000000..9072917 --- /dev/null +++ b/src/oxylabs/sources/asian/lazada/lazada.py @@ -0,0 +1,297 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Lazada: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Lazada class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Lazada search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.LAZADA_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Lazada product page for a given product ID. + + Args: + product_id (str): Lazada product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.LAZADA_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Lazada URL. + + Args: + url (str): Direct URL to any Lazada page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.LAZADA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class LazadaAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the LazadaAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Lazada search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.LAZADA_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Lazada product page for a given product ID. + + Args: + product_id (str): Lazada product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.LAZADA_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Lazada URL. + + Args: + url (str): Direct URL to any Lazada page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.LAZADA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/asian/rakuten/__init__.py b/src/oxylabs/sources/asian/rakuten/__init__.py new file mode 100644 index 0000000..84655cf --- /dev/null +++ b/src/oxylabs/sources/asian/rakuten/__init__.py @@ -0,0 +1 @@ +from .rakuten import Rakuten, RakutenAsync diff --git a/src/oxylabs/sources/asian/rakuten/rakuten.py b/src/oxylabs/sources/asian/rakuten/rakuten.py new file mode 100644 index 0000000..cf3a48d --- /dev/null +++ b/src/oxylabs/sources/asian/rakuten/rakuten.py @@ -0,0 +1,204 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Rakuten: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Rakuten class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Rakuten search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.RAKUTEN_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Rakuten URL. + + Args: + url (str): Direct URL to any Rakuten page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.RAKUTEN_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class RakutenAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the RakutenAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Rakuten search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.RAKUTEN_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Rakuten URL. + + Args: + url (str): Direct URL to any Rakuten page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.RAKUTEN_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/asian/shein/__init__.py b/src/oxylabs/sources/asian/shein/__init__.py new file mode 100644 index 0000000..9d3e9b9 --- /dev/null +++ b/src/oxylabs/sources/asian/shein/__init__.py @@ -0,0 +1 @@ +from .shein import Shein, SheinAsync diff --git a/src/oxylabs/sources/asian/shein/shein.py b/src/oxylabs/sources/asian/shein/shein.py new file mode 100644 index 0000000..c5fc402 --- /dev/null +++ b/src/oxylabs/sources/asian/shein/shein.py @@ -0,0 +1,127 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Shein: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Shein class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Shein search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "com", "com.mx", "co.uk". + start_page (Optional[int]): Starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.SHEIN_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class SheinAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the SheinAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Shein search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "com", "com.mx", "co.uk". + start_page (Optional[int]): Starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.SHEIN_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/asian/tokopedia/__init__.py b/src/oxylabs/sources/asian/tokopedia/__init__.py new file mode 100644 index 0000000..1df5590 --- /dev/null +++ b/src/oxylabs/sources/asian/tokopedia/__init__.py @@ -0,0 +1 @@ +from .tokopedia import Tokopedia, TokopediaAsync diff --git a/src/oxylabs/sources/asian/tokopedia/tokopedia.py b/src/oxylabs/sources/asian/tokopedia/tokopedia.py new file mode 100644 index 0000000..45eeb88 --- /dev/null +++ b/src/oxylabs/sources/asian/tokopedia/tokopedia.py @@ -0,0 +1,204 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Tokopedia: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Tokopedia class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Tokopedia search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.TOKOPEDIA_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Tokopedia URL. + + Args: + url (str): Direct URL to any Tokopedia page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.TOKOPEDIA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class TokopediaAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the TokopediaAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Tokopedia search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.TOKOPEDIA_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Tokopedia URL. + + Args: + url (str): Direct URL to any Tokopedia page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.TOKOPEDIA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/chatgpt/__init__.py b/src/oxylabs/sources/chatgpt/__init__.py new file mode 100644 index 0000000..7877830 --- /dev/null +++ b/src/oxylabs/sources/chatgpt/__init__.py @@ -0,0 +1 @@ +from .chatgpt import Chatgpt, ChatgptAsync diff --git a/src/oxylabs/sources/chatgpt/chatgpt.py b/src/oxylabs/sources/chatgpt/chatgpt.py new file mode 100644 index 0000000..f4edc98 --- /dev/null +++ b/src/oxylabs/sources/chatgpt/chatgpt.py @@ -0,0 +1,129 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Chatgpt: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Chatgpt class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape( + self, + prompt: str, + search: Optional[bool] = None, + geo_location: Optional[str] = None, + render: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes ChatGPT conversational responses for a given prompt. + + Args: + prompt (str): The prompt or question to submit to ChatGPT. + Must be less than 4000 symbols. + search (Optional[bool]): Triggers ChatGPT to perform a Web Search + for the prompt. Defaults to true. + geo_location (Optional[str]): Specify a country to send the prompt from. + render (Optional[str]): JavaScript rendering is enforced by default for chatgpt. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.CHATGPT, + "prompt": prompt, + "search": search, + "geo_location": geo_location, + "render": render, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class ChatgptAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the ChatgptAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape( + self, + prompt: str, + search: Optional[bool] = None, + geo_location: Optional[str] = None, + render: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes ChatGPT conversational responses for a given prompt. + + Args: + prompt (str): The prompt or question to submit to ChatGPT. + Must be less than 4000 symbols. + search (Optional[bool]): Triggers ChatGPT to perform a Web Search + for the prompt. Defaults to true. + geo_location (Optional[str]): Specify a country to send the prompt from. + render (Optional[str]): JavaScript rendering is enforced by default for chatgpt. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.CHATGPT, + "prompt": prompt, + "search": search, + "geo_location": geo_location, + "render": render, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/ebay/__init__.py b/src/oxylabs/sources/ebay/__init__.py new file mode 100644 index 0000000..3892fcc --- /dev/null +++ b/src/oxylabs/sources/ebay/__init__.py @@ -0,0 +1 @@ +from .ebay import Ebay, EbayAsync diff --git a/src/oxylabs/sources/ebay/ebay.py b/src/oxylabs/sources/ebay/ebay.py new file mode 100644 index 0000000..72d4f72 --- /dev/null +++ b/src/oxylabs/sources/ebay/ebay.py @@ -0,0 +1,309 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Ebay: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Ebay class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes eBay search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.EBAY_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes an eBay product page for a given product ID. + + Args: + product_id (str): eBay product ID. + domain (Optional[str]): Domain localization. Accepted values: + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.EBAY_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from an eBay URL. + + Args: + url (str): Direct URL to any eBay page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.EBAY_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class EbayAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the EbayAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes eBay search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.EBAY_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes an eBay product page for a given product ID. + + Args: + product_id (str): eBay product ID. + domain (Optional[str]): Domain localization. Accepted values: + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.EBAY_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from an eBay URL. + + Args: + url (str): Direct URL to any eBay page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.EBAY_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/etsy/__init__.py b/src/oxylabs/sources/etsy/__init__.py new file mode 100644 index 0000000..93cdd98 --- /dev/null +++ b/src/oxylabs/sources/etsy/__init__.py @@ -0,0 +1 @@ +from .etsy import Etsy, EtsyAsync diff --git a/src/oxylabs/sources/etsy/etsy.py b/src/oxylabs/sources/etsy/etsy.py new file mode 100644 index 0000000..2de8c90 --- /dev/null +++ b/src/oxylabs/sources/etsy/etsy.py @@ -0,0 +1,307 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Etsy: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Etsy class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + store_id: Optional[int] = None, + geo_location: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Etsy search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + store_id (Optional[int]): Specify a store. + geo_location (Optional[str]): Set the shipping to location. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ETSY_SEARCH, + "query": query, + "start_page": start_page, + "store_id": store_id, + "geo_location": geo_location, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes an Etsy product page for a given product ID. + + Args: + product_id (str): 10-symbol product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ETSY_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from an Etsy URL. + + Args: + url (str): Direct URL to any Etsy page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ETSY_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class EtsyAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the EtsyAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + store_id: Optional[int] = None, + geo_location: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Etsy search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + store_id (Optional[int]): Specify a store. + geo_location (Optional[str]): Set the shipping to location. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ETSY_SEARCH, + "query": query, + "start_page": start_page, + "store_id": store_id, + "geo_location": geo_location, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes an Etsy product page for a given product ID. + + Args: + product_id (str): 10-symbol product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ETSY_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from an Etsy URL. + + Args: + url (str): Direct URL to any Etsy page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ETSY_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/european/__init__.py b/src/oxylabs/sources/european/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/oxylabs/sources/european/allegro/__init__.py b/src/oxylabs/sources/european/allegro/__init__.py new file mode 100644 index 0000000..d149f09 --- /dev/null +++ b/src/oxylabs/sources/european/allegro/__init__.py @@ -0,0 +1 @@ +from .allegro import Allegro, AllegroAsync diff --git a/src/oxylabs/sources/european/allegro/allegro.py b/src/oxylabs/sources/european/allegro/allegro.py new file mode 100644 index 0000000..9276b19 --- /dev/null +++ b/src/oxylabs/sources/european/allegro/allegro.py @@ -0,0 +1,234 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Allegro: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Allegro class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + delivery_time: Optional[str] = None, + shipping_from: Optional[str] = None, + store_city: Optional[str] = None, + store_region: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Allegro search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + delivery_time (Optional[str]): Delivery date filter. Accepted values: + "one_day", "two_days". + shipping_from (Optional[str]): Shipping-from location. Accepted values: + "china", "czech", "poland", "hungary", "slovakia", + "eu_countries", "allegro_warehouse". + store_city (Optional[str]): Shopping store's city. + store_region (Optional[str]): Shipping store's region. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ALLEGRO_SEARCH, + "query": query, + "start_page": start_page, + "delivery_time": delivery_time, + "shipping_from": shipping_from, + "store_city": store_city, + "store_region": store_region, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes an Allegro product page for a given product ID. + + Args: + product_id (str): 11-symbol Allegro product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ALLEGRO_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class AllegroAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the AllegroAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + delivery_time: Optional[str] = None, + shipping_from: Optional[str] = None, + store_city: Optional[str] = None, + store_region: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Allegro search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + delivery_time (Optional[str]): Delivery date filter. Accepted values: + "one_day", "two_days". + shipping_from (Optional[str]): Shipping-from location. Accepted values: + "china", "czech", "poland", "hungary", "slovakia", + "eu_countries", "allegro_warehouse". + store_city (Optional[str]): Shopping store's city. + store_region (Optional[str]): Shipping store's region. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ALLEGRO_SEARCH, + "query": query, + "start_page": start_page, + "delivery_time": delivery_time, + "shipping_from": shipping_from, + "store_city": store_city, + "store_region": store_region, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes an Allegro product page for a given product ID. + + Args: + product_id (str): 11-symbol Allegro product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ALLEGRO_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/european/cdiscount/__init__.py b/src/oxylabs/sources/european/cdiscount/__init__.py new file mode 100644 index 0000000..eb25050 --- /dev/null +++ b/src/oxylabs/sources/european/cdiscount/__init__.py @@ -0,0 +1 @@ +from .cdiscount import Cdiscount, CdiscountAsync diff --git a/src/oxylabs/sources/european/cdiscount/cdiscount.py b/src/oxylabs/sources/european/cdiscount/cdiscount.py new file mode 100644 index 0000000..b730086 --- /dev/null +++ b/src/oxylabs/sources/european/cdiscount/cdiscount.py @@ -0,0 +1,297 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Cdiscount: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Cdiscount class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Cdiscount search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.CDISCOUNT_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Cdiscount product page for a given product ID. + + Args: + product_id (str): Cdiscount product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.CDISCOUNT_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Cdiscount URL. + + Args: + url (str): Direct URL to any Cdiscount page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.CDISCOUNT_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class CdiscountAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the CdiscountAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Cdiscount search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.CDISCOUNT_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Cdiscount product page for a given product ID. + + Args: + product_id (str): Cdiscount product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.CDISCOUNT_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Cdiscount URL. + + Args: + url (str): Direct URL to any Cdiscount page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.CDISCOUNT_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/european/idealo/__init__.py b/src/oxylabs/sources/european/idealo/__init__.py new file mode 100644 index 0000000..ae2d252 --- /dev/null +++ b/src/oxylabs/sources/european/idealo/__init__.py @@ -0,0 +1 @@ +from .idealo import Idealo, IdealoAsync diff --git a/src/oxylabs/sources/european/idealo/idealo.py b/src/oxylabs/sources/european/idealo/idealo.py new file mode 100644 index 0000000..d062e86 --- /dev/null +++ b/src/oxylabs/sources/european/idealo/idealo.py @@ -0,0 +1,113 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Idealo: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Idealo class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Idealo search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.IDEALO_SEARCH, + "query": query, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class IdealoAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the IdealoAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Idealo search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.IDEALO_SEARCH, + "query": query, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/european/mediamarkt/__init__.py b/src/oxylabs/sources/european/mediamarkt/__init__.py new file mode 100644 index 0000000..00b3d6a --- /dev/null +++ b/src/oxylabs/sources/european/mediamarkt/__init__.py @@ -0,0 +1 @@ +from .mediamarkt import Mediamarkt, MediamarktAsync diff --git a/src/oxylabs/sources/european/mediamarkt/mediamarkt.py b/src/oxylabs/sources/european/mediamarkt/mediamarkt.py new file mode 100644 index 0000000..16df635 --- /dev/null +++ b/src/oxylabs/sources/european/mediamarkt/mediamarkt.py @@ -0,0 +1,305 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Mediamarkt: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Mediamarkt class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes MediaMarkt search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "de", "es", "pl", "nl", "hu", "ch", "at", "com.tr". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MEDIAMARKT_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a MediaMarkt product page for a given product ID. + + Args: + product_id (str): MediaMarkt product ID. + domain (Optional[str]): Domain localization. Accepted values: + "de", "es", "pl", "nl", "hu", "ch", "at", "com.tr". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MEDIAMARKT_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a MediaMarkt URL. + + Args: + url (str): Direct URL to any MediaMarkt page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MEDIAMARKT_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class MediamarktAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the MediamarktAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes MediaMarkt search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "de", "es", "pl", "nl", "hu", "ch", "at", "com.tr". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MEDIAMARKT_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a MediaMarkt product page for a given product ID. + + Args: + product_id (str): MediaMarkt product ID. + domain (Optional[str]): Domain localization. Accepted values: + "de", "es", "pl", "nl", "hu", "ch", "at", "com.tr". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MEDIAMARKT_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a MediaMarkt URL. + + Args: + url (str): Direct URL to any MediaMarkt page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MEDIAMARKT_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/google/google.py b/src/oxylabs/sources/google/google.py index 04afbf3..6ea3a71 100644 --- a/src/oxylabs/sources/google/google.py +++ b/src/oxylabs/sources/google/google.py @@ -511,6 +511,123 @@ def scrape_lens( api_response = self._api_instance.get_response(payload, config) return Response(api_response) + def scrape_ai_mode( + self, + query: str, + render: Optional[str] = "html", + parse: Optional[bool] = None, + geo_location: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs, + ) -> Response: + """ + Scrapes Google AI Mode results for a given query. + + Args: + query (str): The prompt or question to submit. Must be less than 400 symbols. + render (Optional[str]): Enables JavaScript rendering. Required for this + source, defaults to "html". + parse (Optional[bool]): true will return structured data. + geo_location (Optional[str]): The geographical location that the + result should be adapted for. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.GOOGLE_AI_MODE, + "query": query, + "render": render, + "parse": parse, + "geo_location": geo_location, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_news( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + pages: Optional[int] = None, + limit: Optional[int] = None, + locale: Optional[str] = None, + geo_location: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + parse: Optional[bool] = None, + parsing_instructions: Optional[dict] = None, + context: Optional[list] = None, + request_timeout: Optional[int] = 165, + **kwargs, + ) -> Response: + """ + Scrapes Google News search results for a given query. + + Args: + query (str): The search query. + domain (Optional[str]): The domain to limit the search results to. + start_page (Optional[int]): The starting page number. + pages (Optional[int]): The number of pages to scrape. + limit (Optional[int]): Number of results to retrieve in each page. + locale (Optional[str]): Accept-Language header value which changes page web interface language. + geo_location (Optional[str]): The geographical location that the + result should be adapted for. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + parse (Optional[bool]): true will return structured data. + parsing_instructions (Optional[dict]): Instructions for parsing the results. + context (Optional[list]): Context parameters. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.GOOGLE_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "pages": pages, + "limit": limit, + "locale": locale, + "geo_location": geo_location, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + "parse": parse, + "parsing_instructions": parsing_instructions, + "context": context, + **kwargs, + } + payload["context"] = payload.get("context") or [] + + for item in payload["context"]: + if item.get("key") == "tbm": + item["value"] = item.get("value", "nws") + break + else: + payload["context"].append({"key": "tbm", "value": "nws"}) + + check_parsing_instructions_validity(parsing_instructions) + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + class GoogleAsync: def __init__(self, api_instance:AsyncAPI) -> None: """ @@ -1111,3 +1228,142 @@ async def scrape_lens( } api_response = await self._api_instance.get_response(payload, config) return Response(api_response) + + async def scrape_ai_mode( + self, + query: str, + render: Optional[str] = "html", + parse: Optional[bool] = None, + geo_location: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs, + ) -> Response: + """ + Asynchronously scrapes Google AI Mode results for a given query. + + Args: + query (str): The prompt or question to submit. Must be less than 400 symbols. + render (Optional[str]): Enables JavaScript rendering. Required for this + source, defaults to "html". + parse (Optional[bool]): true will return structured data. + geo_location (Optional[str]): The geographical location that the + result should be adapted for. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.GOOGLE_AI_MODE, + "query": query, + "render": render, + "parse": parse, + "geo_location": geo_location, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_news( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + pages: Optional[int] = None, + limit: Optional[int] = None, + locale: Optional[str] = None, + geo_location: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + parse: Optional[bool] = None, + parsing_instructions: Optional[dict] = None, + context: Optional[list] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs, + ) -> Response: + """ + Asynchronously scrapes Google News search results for a given query. + + Args: + query (str): The search query. + domain (Optional[str]): The domain to limit the search results to. + start_page (Optional[int]): The starting page number. + pages (Optional[int]): The number of pages to scrape. + limit (Optional[int]): Number of results to retrieve in each page. + locale (Optional[str]): Accept-Language header value which changes page web interface language. + geo_location (Optional[str]): The geographical location that the + result should be adapted for. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + parse (Optional[bool]): true will return structured data. + parsing_instructions (Optional[dict]): Instructions for parsing the results. + context (Optional[list]): Context parameters. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.GOOGLE_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "pages": pages, + "limit": limit, + "locale": locale, + "geo_location": geo_location, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + "parse": parse, + "parsing_instructions": parsing_instructions, + "context": context, + **kwargs, + } + payload["context"] = payload.get("context") or [] + + for item in payload["context"]: + if item.get("key") == "tbm": + item["value"] = item.get("value", "nws") + break + else: + payload["context"].append({"key": "tbm", "value": "nws"}) + + check_parsing_instructions_validity(parsing_instructions) + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/google_shopping/google_shopping.py b/src/oxylabs/sources/google_shopping/google_shopping.py index 78c4d5a..7e8ebed 100644 --- a/src/oxylabs/sources/google_shopping/google_shopping.py +++ b/src/oxylabs/sources/google_shopping/google_shopping.py @@ -187,67 +187,6 @@ def scrape_shopping_products( api_response = self._api_instance.get_response(payload, config) return Response(api_response) - def scrape_product_pricing( - self, - query: str, - domain: Optional[str] = None, - start_page: Optional[int] = None, - pages: Optional[int] = None, - locale: Optional[str] = None, - results_language: Optional[str] = None, - geo_location: Optional[str] = None, - user_agent_type: Optional[str] = None, - render: Optional[str] = None, - callback_url: Optional[str] = None, - parse: Optional[bool] = None, - parsing_instructions: Optional[dict] = None, - request_timeout: Optional[int] = 165, - **kwargs - ) -> Response: - """ - Scrapes Google Shopping product pricing results for a given product code. - - Args: - query (str): UTF-encoded product code. - domain (Optional[str]): The domain to limit the search results to. - locale (Optional[str]): Accept-Language header value which changes page web interface language. - start_page (Optional[int]): The starting page number. - pages (Optional[int]): The number of pages to scrape. - "results_language": None, - geo_location (Optional[str]): None, - user_agent_type (Optional[str]): Device type and browser. - render (Optional[str]): Enables JavaScript rendering. - callback_url (Optional[str]): URL to your callback endpoint. - parse (Optional[bool]): true will return structured data. - parsing_instructions (Optional[dict]): Instructions for parsing the results. - request_timeout (int | 165, optional): The interval in seconds for - the request to time out if no response is returned. - Defaults to 165. - Returns: - Response: The response from the server after the job is completed. - """ - - config = prepare_config(request_timeout=request_timeout) - payload = { - "source": source.GOOGLE_SHOPPING_PRICING, - "domain": domain, - "query": query, - "start_page": start_page, - "pages": pages, - "locale": locale, - "results_language": results_language, - "geo_location": geo_location, - "user_agent_type": user_agent_type, - "render": render, - "callback_url": callback_url, - "parse": parse, - "parsing_instructions": parsing_instructions, - **kwargs, - } - check_parsing_instructions_validity(parsing_instructions) - api_response = self._api_instance.get_response(payload, config) - return Response(api_response) - class GoogleShoppingAsync: def __init__(self, api_instance:AsyncAPI) -> None: """ @@ -457,74 +396,3 @@ async def scrape_shopping_products( api_response = await self._api_instance.get_response(payload, config) return Response(api_response) - async def scrape_product_pricing( - self, - query: str, - domain: Optional[str] = None, - start_page: Optional[int] = None, - pages: Optional[int] = None, - locale: Optional[str] = None, - results_language: Optional[str] = None, - geo_location: Optional[str] = None, - user_agent_type: Optional[str] = None, - render: Optional[str] = None, - callback_url: Optional[str] = None, - parse: Optional[bool] = None, - parsing_instructions: Optional[dict] = None, - request_timeout: Optional[int] = 165, - job_completion_timeout: Optional[int] = None, - poll_interval: Optional[int] = None, - **kwargs - ) -> Response: - """ - Scrapes Google Shopping product pricing results for a given product code. - - Args: - url (str): UTF-encoded product code. - domain (Optional[str]): The domain to limit the search results to. - start_page (Optional[int]): The starting page number. - pages (Optional[int]): The number of pages to scrape. - locale (Optional[str]): Accept-Language header value which changes page web interface language. - "results_language": None, - geo_location (Optional[str]): None, - user_agent_type (Optional[str]): Device type and browser. - render (Optional[str]): Enables JavaScript rendering. - callback_url (Optional[str]): URL to your callback endpoint. - parse (Optional[bool]): true will return structured data. - parsing_instructions (Optional[dict]): Instructions for parsing the results. - request_timeout (int | 165, optional): The interval in seconds for - the request to time out if no response is returned. - Defaults to 165. - poll_interval (Optional[int]): The interval in seconds to poll - the server for a response. - job_completion_timeout (Optional[int]): The interval in - seconds for the job to time out if no response is returned. - Returns: - Response: The response from the server after the job is completed. - """ - - config = prepare_config( - request_timeout=request_timeout, - poll_interval=poll_interval, - job_completion_timeout=job_completion_timeout, - async_integration=True, - ) - payload = { - "source": source.GOOGLE_SHOPPING_PRICING, - "domain": domain, - "query": query, - "start_page": start_page, - "pages": pages, - "locale": locale, - "results_language": results_language, - "geo_location": geo_location, - "user_agent_type": user_agent_type, - "render": render, - "callback_url": callback_url, - "parse": parse, - "parsing_instructions": parsing_instructions, - **kwargs, - } - check_parsing_instructions_validity(parsing_instructions) - api_response = await self._api_instance.get_response(payload, config) - return Response(api_response) diff --git a/src/oxylabs/sources/kroger/kroger.py b/src/oxylabs/sources/kroger/kroger.py index 5ca7f88..5f2489b 100644 --- a/src/oxylabs/sources/kroger/kroger.py +++ b/src/oxylabs/sources/kroger/kroger.py @@ -71,6 +71,7 @@ def scrape_search( store_id: Optional[int] = None, delivery_zip: Optional[str] = None, fulfillment_type: Optional[str] = None, + context: Optional[list] = None, request_timeout: Optional[int] = 165, **kwargs ) -> Response: @@ -85,6 +86,7 @@ def scrape_search( store_id (Optional[int]): The store ID. delivery_zip (Optional[str]): The delivery location ZIP code. fulfillment_type (Optional[str]): The Fulfillment method. + context (Optional[list]): Context parameters (price_range, brand). request_timeout (int | 165, optional): The interval in seconds for the request to time out if no response is returned. Defaults to 165. @@ -103,6 +105,7 @@ def scrape_search( "store_id": store_id, "delivery_zip": delivery_zip, "fulfillment_type": fulfillment_type, + "context": context, **kwargs, } api_response = self._api_instance.get_response(payload, config) @@ -231,6 +234,7 @@ async def scrape_search( store_id: Optional[int] = None, delivery_zip: Optional[str] = None, fulfillment_type: Optional[str] = None, + context: Optional[list] = None, request_timeout: Optional[int] = 165, job_completion_timeout: Optional[int] = None, poll_interval: Optional[int] = None, @@ -247,6 +251,7 @@ async def scrape_search( store_id (Optional[int]): The store ID. delivery_zip (Optional[str]): The delivery location ZIP code. fulfillment_type (Optional[str]): The Fulfillment method. + context (Optional[list]): Context parameters (price_range, brand). request_timeout (int | 165, optional): The interval in seconds for the request to time out if no response is returned. Defaults to 165. @@ -274,6 +279,7 @@ async def scrape_search( "store_id": store_id, "delivery_zip": delivery_zip, "fulfillment_type": fulfillment_type, + "context": context, **kwargs, } api_response = await self._api_instance.get_response(payload, config) diff --git a/src/oxylabs/sources/latin_american/__init__.py b/src/oxylabs/sources/latin_american/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/oxylabs/sources/latin_american/dcard/__init__.py b/src/oxylabs/sources/latin_american/dcard/__init__.py new file mode 100644 index 0000000..02b732c --- /dev/null +++ b/src/oxylabs/sources/latin_american/dcard/__init__.py @@ -0,0 +1 @@ +from .dcard import Dcard, DcardAsync diff --git a/src/oxylabs/sources/latin_american/dcard/dcard.py b/src/oxylabs/sources/latin_american/dcard/dcard.py new file mode 100644 index 0000000..c66dd00 --- /dev/null +++ b/src/oxylabs/sources/latin_american/dcard/dcard.py @@ -0,0 +1,113 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Dcard: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Dcard class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Dcard search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.DCARD_SEARCH, + "query": query, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class DcardAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the DcardAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Dcard search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.DCARD_SEARCH, + "query": query, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/latin_american/falabella/__init__.py b/src/oxylabs/sources/latin_american/falabella/__init__.py new file mode 100644 index 0000000..307d007 --- /dev/null +++ b/src/oxylabs/sources/latin_american/falabella/__init__.py @@ -0,0 +1 @@ +from .falabella import Falabella, FalabellaAsync diff --git a/src/oxylabs/sources/latin_american/falabella/falabella.py b/src/oxylabs/sources/latin_american/falabella/falabella.py new file mode 100644 index 0000000..5251524 --- /dev/null +++ b/src/oxylabs/sources/latin_american/falabella/falabella.py @@ -0,0 +1,297 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Falabella: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Falabella class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Falabella search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.FALABELLA_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Falabella product page for a given product ID. + + Args: + product_id (str): Falabella product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.FALABELLA_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Falabella URL. + + Args: + url (str): Direct URL to any Falabella page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.FALABELLA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class FalabellaAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the FalabellaAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Falabella search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.FALABELLA_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Falabella product page for a given product ID. + + Args: + product_id (str): Falabella product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.FALABELLA_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Falabella URL. + + Args: + url (str): Direct URL to any Falabella page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.FALABELLA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/latin_american/magazineluiza/__init__.py b/src/oxylabs/sources/latin_american/magazineluiza/__init__.py new file mode 100644 index 0000000..06cb2e9 --- /dev/null +++ b/src/oxylabs/sources/latin_american/magazineluiza/__init__.py @@ -0,0 +1 @@ +from .magazineluiza import Magazineluiza, MagazineluizaAsync diff --git a/src/oxylabs/sources/latin_american/magazineluiza/magazineluiza.py b/src/oxylabs/sources/latin_american/magazineluiza/magazineluiza.py new file mode 100644 index 0000000..9110981 --- /dev/null +++ b/src/oxylabs/sources/latin_american/magazineluiza/magazineluiza.py @@ -0,0 +1,297 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Magazineluiza: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Magazineluiza class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Magazine Luiza search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MAGAZINELUIZA_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Magazine Luiza product page for a given product ID. + + Args: + product_id (str): Magazine Luiza product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MAGAZINELUIZA_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Magazine Luiza URL. + + Args: + url (str): Direct URL to any Magazine Luiza page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MAGAZINELUIZA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class MagazineluizaAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the MagazineluizaAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Magazine Luiza search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MAGAZINELUIZA_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Magazine Luiza product page for a given product ID. + + Args: + product_id (str): Magazine Luiza product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MAGAZINELUIZA_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Magazine Luiza URL. + + Args: + url (str): Direct URL to any Magazine Luiza page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MAGAZINELUIZA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/latin_american/mercadolibre/__init__.py b/src/oxylabs/sources/latin_american/mercadolibre/__init__.py new file mode 100644 index 0000000..ceaa995 --- /dev/null +++ b/src/oxylabs/sources/latin_american/mercadolibre/__init__.py @@ -0,0 +1 @@ +from .mercadolibre import Mercadolibre, MercadolibreAsync diff --git a/src/oxylabs/sources/latin_american/mercadolibre/mercadolibre.py b/src/oxylabs/sources/latin_american/mercadolibre/mercadolibre.py new file mode 100644 index 0000000..9c73a47 --- /dev/null +++ b/src/oxylabs/sources/latin_american/mercadolibre/mercadolibre.py @@ -0,0 +1,297 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Mercadolibre: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Mercadolibre class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Mercado Libre search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MERCADOLIBRE_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Mercado Libre product page for a given product ID. + + Args: + product_id (str): Mercado Libre product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MERCADOLIBRE_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Mercado Libre URL. + + Args: + url (str): Direct URL to any Mercado Libre page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MERCADOLIBRE_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class MercadolibreAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the MercadolibreAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Mercado Libre search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MERCADOLIBRE_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Mercado Libre product page for a given product ID. + + Args: + product_id (str): Mercado Libre product ID. + "com", "co.uk", "de", "ca", "com.au", "it", + "pl", "ph", "com.hk", "com.sg". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MERCADOLIBRE_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Mercado Libre URL. + + Args: + url (str): Direct URL to any Mercado Libre page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MERCADOLIBRE_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/latin_american/mercadolivre/__init__.py b/src/oxylabs/sources/latin_american/mercadolivre/__init__.py new file mode 100644 index 0000000..b11f486 --- /dev/null +++ b/src/oxylabs/sources/latin_american/mercadolivre/__init__.py @@ -0,0 +1 @@ +from .mercadolivre import Mercadolivre, MercadolivreAsync diff --git a/src/oxylabs/sources/latin_american/mercadolivre/mercadolivre.py b/src/oxylabs/sources/latin_american/mercadolivre/mercadolivre.py new file mode 100644 index 0000000..b14cf36 --- /dev/null +++ b/src/oxylabs/sources/latin_american/mercadolivre/mercadolivre.py @@ -0,0 +1,204 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Mercadolivre: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Mercadolivre class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Mercado Livre search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MERCADOLIVRE_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Mercado Livre product page for a given product ID. + + Args: + product_id (str): Mercado Livre product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MERCADOLIVRE_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class MercadolivreAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the MercadolivreAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Mercado Livre search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MERCADOLIVRE_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Mercado Livre product page for a given product ID. + + Args: + product_id (str): Mercado Livre product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MERCADOLIVRE_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/__init__.py b/src/oxylabs/sources/north_american/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/oxylabs/sources/north_american/bedbathandbeyond/__init__.py b/src/oxylabs/sources/north_american/bedbathandbeyond/__init__.py new file mode 100644 index 0000000..79dc5ff --- /dev/null +++ b/src/oxylabs/sources/north_american/bedbathandbeyond/__init__.py @@ -0,0 +1 @@ +from .bedbathandbeyond import Bedbathandbeyond, BedbathandbeyondAsync diff --git a/src/oxylabs/sources/north_american/bedbathandbeyond/bedbathandbeyond.py b/src/oxylabs/sources/north_american/bedbathandbeyond/bedbathandbeyond.py new file mode 100644 index 0000000..bc1b00a --- /dev/null +++ b/src/oxylabs/sources/north_american/bedbathandbeyond/bedbathandbeyond.py @@ -0,0 +1,289 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Bedbathandbeyond: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Bedbathandbeyond class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Bed Bath & Beyond search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.BEDBATHANDBEYOND_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Bed Bath & Beyond product page for a given product ID. + + Args: + product_id (str): 8-digit Bed Bath & Beyond product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.BEDBATHANDBEYOND_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Bed Bath & Beyond URL. + + Args: + url (str): Direct URL to any Bed Bath & Beyond page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.BEDBATHANDBEYOND_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class BedbathandbeyondAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the BedbathandbeyondAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Bed Bath & Beyond search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.BEDBATHANDBEYOND_SEARCH, + "query": query, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Bed Bath & Beyond product page for a given product ID. + + Args: + product_id (str): 8-digit Bed Bath & Beyond product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.BEDBATHANDBEYOND_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Bed Bath & Beyond URL. + + Args: + url (str): Direct URL to any Bed Bath & Beyond page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.BEDBATHANDBEYOND_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/bestbuy/__init__.py b/src/oxylabs/sources/north_american/bestbuy/__init__.py new file mode 100644 index 0000000..c520dac --- /dev/null +++ b/src/oxylabs/sources/north_american/bestbuy/__init__.py @@ -0,0 +1 @@ +from .bestbuy import Bestbuy, BestbuyAsync diff --git a/src/oxylabs/sources/north_american/bestbuy/bestbuy.py b/src/oxylabs/sources/north_american/bestbuy/bestbuy.py new file mode 100644 index 0000000..4be472a --- /dev/null +++ b/src/oxylabs/sources/north_american/bestbuy/bestbuy.py @@ -0,0 +1,254 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Bestbuy: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Bestbuy class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Best Buy search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: "com", "ca". + start_page (Optional[int]): Starting page number. + store_id (Optional[int]): Specify a store. + delivery_zip (Optional[str]): Set the delivery ZIP location. + fulfillment_type (Optional[str]): Order fulfillment method. + Accepted values: "pickup", "shipping". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.BESTBUY_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "store_id": store_id, + "delivery_zip": delivery_zip, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Best Buy product page for a given product ID. + + Args: + product_id (str): 7-symbol product ID. + domain (Optional[str]): Domain localization. Accepted values: "com", "ca". + store_id (Optional[int]): Specify a store. + delivery_zip (Optional[str]): Set the delivery ZIP location. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.BESTBUY_PRODUCT, + "product_id": product_id, + "domain": domain, + "store_id": store_id, + "delivery_zip": delivery_zip, + "user_agent_type": user_agent_type, + "render": render, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class BestbuyAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the BestbuyAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Best Buy search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: "com", "ca". + start_page (Optional[int]): Starting page number. + store_id (Optional[int]): Specify a store. + delivery_zip (Optional[str]): Set the delivery ZIP location. + fulfillment_type (Optional[str]): Order fulfillment method. + Accepted values: "pickup", "shipping". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.BESTBUY_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "store_id": store_id, + "delivery_zip": delivery_zip, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Best Buy product page for a given product ID. + + Args: + product_id (str): 7-symbol product ID. + domain (Optional[str]): Domain localization. Accepted values: "com", "ca". + store_id (Optional[int]): Specify a store. + delivery_zip (Optional[str]): Set the delivery ZIP location. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.BESTBUY_PRODUCT, + "product_id": product_id, + "domain": domain, + "store_id": store_id, + "delivery_zip": delivery_zip, + "user_agent_type": user_agent_type, + "render": render, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/bodegaaurrera/__init__.py b/src/oxylabs/sources/north_american/bodegaaurrera/__init__.py new file mode 100644 index 0000000..3f193a3 --- /dev/null +++ b/src/oxylabs/sources/north_american/bodegaaurrera/__init__.py @@ -0,0 +1 @@ +from .bodegaaurrera import Bodegaaurrera, BodegaaurreraAsync diff --git a/src/oxylabs/sources/north_american/bodegaaurrera/bodegaaurrera.py b/src/oxylabs/sources/north_american/bodegaaurrera/bodegaaurrera.py new file mode 100644 index 0000000..a7bcdd0 --- /dev/null +++ b/src/oxylabs/sources/north_american/bodegaaurrera/bodegaaurrera.py @@ -0,0 +1,295 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Bodegaaurrera: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Bodegaaurrera class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + subdomain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Bodega Aurrera search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): The starting page number. + subdomain (Optional[str]): Subdomain localization. Accepts only "despensa". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.BODEGAAURRERA_SEARCH, + "query": query, + "start_page": start_page, + "subdomain": subdomain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Bodega Aurrera product page for a given product ID. + + Args: + product_id (str): Bodega Aurrera product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.BODEGAAURRERA_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Bodega Aurrera URL. + + Args: + url (str): Direct URL to any Bodega Aurrera page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.BODEGAAURRERA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class BodegaaurreraAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the BodegaaurreraAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + subdomain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Bodega Aurrera search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): The starting page number. + subdomain (Optional[str]): Subdomain localization. Accepts only "despensa". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.BODEGAAURRERA_SEARCH, + "query": query, + "start_page": start_page, + "subdomain": subdomain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Bodega Aurrera product page for a given product ID. + + Args: + product_id (str): Bodega Aurrera product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.BODEGAAURRERA_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Bodega Aurrera URL. + + Args: + url (str): Direct URL to any Bodega Aurrera page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.BODEGAAURRERA_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/costco/__init__.py b/src/oxylabs/sources/north_american/costco/__init__.py new file mode 100644 index 0000000..8b2fbc5 --- /dev/null +++ b/src/oxylabs/sources/north_american/costco/__init__.py @@ -0,0 +1 @@ +from .costco import Costco, CostcoAsync diff --git a/src/oxylabs/sources/north_american/costco/costco.py b/src/oxylabs/sources/north_american/costco/costco.py new file mode 100644 index 0000000..96774f6 --- /dev/null +++ b/src/oxylabs/sources/north_american/costco/costco.py @@ -0,0 +1,309 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Costco: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Costco class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Costco search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "com", "ca", "co.uk", "com.au", "com.mx", "fr", + "co.jp", "is", "co.nz", "co.kr", "es", "com.tw". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.COSTCO_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Costco product page for a given product ID. + + Args: + product_id (str): Costco product ID. + domain (Optional[str]): Domain localization. Accepted values: + "com", "ca", "co.uk", "com.au", "com.mx", "fr", + "co.jp", "is", "co.nz", "co.kr", "es", "com.tw". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.COSTCO_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Costco URL. + + Args: + url (str): Direct URL to any Costco page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.COSTCO_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class CostcoAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the CostcoAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Costco search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "com", "ca", "co.uk", "com.au", "com.mx", "fr", + "co.jp", "is", "co.nz", "co.kr", "es", "com.tw". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.COSTCO_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Costco product page for a given product ID. + + Args: + product_id (str): Costco product ID. + domain (Optional[str]): Domain localization. Accepted values: + "com", "ca", "co.uk", "com.au", "com.mx", "fr", + "co.jp", "is", "co.nz", "co.kr", "es", "com.tw". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.COSTCO_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Costco URL. + + Args: + url (str): Direct URL to any Costco page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.COSTCO_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/grainger/__init__.py b/src/oxylabs/sources/north_american/grainger/__init__.py new file mode 100644 index 0000000..cc8bac3 --- /dev/null +++ b/src/oxylabs/sources/north_american/grainger/__init__.py @@ -0,0 +1 @@ +from .grainger import Grainger, GraingerAsync diff --git a/src/oxylabs/sources/north_american/grainger/grainger.py b/src/oxylabs/sources/north_american/grainger/grainger.py new file mode 100644 index 0000000..c9f50a7 --- /dev/null +++ b/src/oxylabs/sources/north_american/grainger/grainger.py @@ -0,0 +1,305 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Grainger: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Grainger class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + domain: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Grainger search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (str): Domain localization. Accepted values: + "com", "com.mx", "ca". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.GRAINGER_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + domain: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Grainger product page for a given product ID. + + Args: + product_id (str): Grainger product ID. + domain (str): Domain localization. Accepted values: + "com", "com.mx", "ca". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.GRAINGER_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Grainger URL. + + Args: + url (str): Direct URL to any Grainger page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.GRAINGER_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class GraingerAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the GraingerAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + domain: str, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Grainger search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (str): Domain localization. Accepted values: + "com", "com.mx", "ca". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.GRAINGER_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + domain: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Grainger product page for a given product ID. + + Args: + product_id (str): Grainger product ID. + domain (str): Domain localization. Accepted values: + "com", "com.mx", "ca". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.GRAINGER_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Grainger URL. + + Args: + url (str): Direct URL to any Grainger page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.GRAINGER_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/instacart/__init__.py b/src/oxylabs/sources/north_american/instacart/__init__.py new file mode 100644 index 0000000..ba59bfb --- /dev/null +++ b/src/oxylabs/sources/north_american/instacart/__init__.py @@ -0,0 +1 @@ +from .instacart import Instacart, InstacartAsync diff --git a/src/oxylabs/sources/north_american/instacart/instacart.py b/src/oxylabs/sources/north_american/instacart/instacart.py new file mode 100644 index 0000000..ebeaba5 --- /dev/null +++ b/src/oxylabs/sources/north_american/instacart/instacart.py @@ -0,0 +1,305 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Instacart: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Instacart class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Instacart search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "com", "ca". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.INSTACART_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes an Instacart product page for a given product ID. + + Args: + product_id (str): Instacart product ID. + domain (Optional[str]): Domain localization. Accepted values: + "com", "ca". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.INSTACART_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from an Instacart URL. + + Args: + url (str): Direct URL to any Instacart page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.INSTACART_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class InstacartAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the InstacartAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Instacart search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "com", "ca". + start_page (Optional[int]): The starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.INSTACART_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes an Instacart product page for a given product ID. + + Args: + product_id (str): Instacart product ID. + domain (Optional[str]): Domain localization. Accepted values: + "com", "ca". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.INSTACART_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from an Instacart URL. + + Args: + url (str): Direct URL to any Instacart page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.INSTACART_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/lowes/__init__.py b/src/oxylabs/sources/north_american/lowes/__init__.py new file mode 100644 index 0000000..7972493 --- /dev/null +++ b/src/oxylabs/sources/north_american/lowes/__init__.py @@ -0,0 +1 @@ +from .lowes import Lowes, LowesAsync diff --git a/src/oxylabs/sources/north_american/lowes/lowes.py b/src/oxylabs/sources/north_american/lowes/lowes.py new file mode 100644 index 0000000..0dad69d --- /dev/null +++ b/src/oxylabs/sources/north_american/lowes/lowes.py @@ -0,0 +1,313 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Lowes: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Lowes class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + free_delivery: Optional[bool] = None, + pickup_today: Optional[bool] = None, + delivery_today_tomorrow: Optional[bool] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Lowe's search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + store_id (Optional[int]): Specify a store. + delivery_zip (Optional[str]): Delivery location ZIP code. + free_delivery (Optional[bool]): Filter for free delivery products. + pickup_today (Optional[bool]): Filter for same day pickup availability. + delivery_today_tomorrow (Optional[bool]): Filter for same/next day delivery. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.LOWES_SEARCH, + "query": query, + "store_id": store_id, + "delivery_zip": delivery_zip, + "free_delivery": free_delivery, + "pickup_today": pickup_today, + "delivery_today_tomorrow": delivery_today_tomorrow, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Lowe's product page for a given product ID. + + Args: + product_id (str): Lowe's product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.LOWES_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Lowe's URL. + + Args: + url (str): Direct URL to any Lowe's page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.LOWES_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class LowesAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the LowesAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + free_delivery: Optional[bool] = None, + pickup_today: Optional[bool] = None, + delivery_today_tomorrow: Optional[bool] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Lowe's search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + store_id (Optional[int]): Specify a store. + delivery_zip (Optional[str]): Delivery location ZIP code. + free_delivery (Optional[bool]): Filter for free delivery products. + pickup_today (Optional[bool]): Filter for same day pickup availability. + delivery_today_tomorrow (Optional[bool]): Filter for same/next day delivery. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.LOWES_SEARCH, + "query": query, + "store_id": store_id, + "delivery_zip": delivery_zip, + "free_delivery": free_delivery, + "pickup_today": pickup_today, + "delivery_today_tomorrow": delivery_today_tomorrow, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Lowe's product page for a given product ID. + + Args: + product_id (str): Lowe's product ID. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.LOWES_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Lowe's URL. + + Args: + url (str): Direct URL to any Lowe's page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.LOWES_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/menards/__init__.py b/src/oxylabs/sources/north_american/menards/__init__.py new file mode 100644 index 0000000..c2485a9 --- /dev/null +++ b/src/oxylabs/sources/north_american/menards/__init__.py @@ -0,0 +1 @@ +from .menards import Menards, MenardsAsync diff --git a/src/oxylabs/sources/north_american/menards/menards.py b/src/oxylabs/sources/north_american/menards/menards.py new file mode 100644 index 0000000..2f05f09 --- /dev/null +++ b/src/oxylabs/sources/north_american/menards/menards.py @@ -0,0 +1,331 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Menards: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Menards class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + store_id: Optional[str] = None, + pickup_at_store_eligible: Optional[bool] = None, + in_stock_today: Optional[bool] = None, + fulfillment_center: Optional[bool] = None, + delivery_eligible: Optional[bool] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Menards search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + store_id (Optional[str]): Store ID from URL parameter. + pickup_at_store_eligible (Optional[bool]): Pickup available at store. + in_stock_today (Optional[bool]): Item in stock today. + fulfillment_center (Optional[bool]): Ship from Menards warehouse. + delivery_eligible (Optional[bool]): Qualifies for home delivery. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MENARDS_SEARCH, + "query": query, + "start_page": start_page, + "store_id": store_id, + "pickup_at_store_eligible": pickup_at_store_eligible, + "in_stock_today": in_stock_today, + "fulfillment_center": fulfillment_center, + "delivery_eligible": delivery_eligible, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + store_id: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Menards product page for a given product ID. + + Args: + product_id (str): Menards product ID. + store_id (Optional[str]): Store ID from URL parameter. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MENARDS_PRODUCT, + "product_id": product_id, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + store_id: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Menards URL. + + Args: + url (str): Direct URL to any Menards page. + store_id (Optional[str]): Store ID from URL parameter. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.MENARDS_URL, + "url": url, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class MenardsAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the MenardsAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + store_id: Optional[str] = None, + pickup_at_store_eligible: Optional[bool] = None, + in_stock_today: Optional[bool] = None, + fulfillment_center: Optional[bool] = None, + delivery_eligible: Optional[bool] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Menards search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + store_id (Optional[str]): Store ID from URL parameter. + pickup_at_store_eligible (Optional[bool]): Pickup available at store. + in_stock_today (Optional[bool]): Item in stock today. + fulfillment_center (Optional[bool]): Ship from Menards warehouse. + delivery_eligible (Optional[bool]): Qualifies for home delivery. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MENARDS_SEARCH, + "query": query, + "start_page": start_page, + "store_id": store_id, + "pickup_at_store_eligible": pickup_at_store_eligible, + "in_stock_today": in_stock_today, + "fulfillment_center": fulfillment_center, + "delivery_eligible": delivery_eligible, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + store_id: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Menards product page for a given product ID. + + Args: + product_id (str): Menards product ID. + store_id (Optional[str]): Store ID from URL parameter. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MENARDS_PRODUCT, + "product_id": product_id, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + store_id: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Menards URL. + + Args: + url (str): Direct URL to any Menards page. + store_id (Optional[str]): Store ID from URL parameter. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.MENARDS_URL, + "url": url, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/petco/__init__.py b/src/oxylabs/sources/north_american/petco/__init__.py new file mode 100644 index 0000000..cbc76ca --- /dev/null +++ b/src/oxylabs/sources/north_american/petco/__init__.py @@ -0,0 +1 @@ +from .petco import Petco, PetcoAsync diff --git a/src/oxylabs/sources/north_american/petco/petco.py b/src/oxylabs/sources/north_american/petco/petco.py new file mode 100644 index 0000000..10db1bd --- /dev/null +++ b/src/oxylabs/sources/north_american/petco/petco.py @@ -0,0 +1,224 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Petco: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Petco class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + store_id: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Petco search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): The starting page number. + store_id (Optional[str]): Specify a store. + fulfillment_type (Optional[str]): Order fulfillment method. + Accepted values: "repeat_delivery", "free_pickup_today", "same_day_delivery". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.PETCO_SEARCH, + "query": query, + "start_page": start_page, + "store_id": store_id, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + store_id: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Petco URL. + + Args: + url (str): Direct URL to any Petco page. + store_id (Optional[int]): Specify a Petco store. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.PETCO_URL, + "url": url, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class PetcoAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the PetcoAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + store_id: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Petco search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): The starting page number. + store_id (Optional[str]): Specify a store. + fulfillment_type (Optional[str]): Order fulfillment method. + Accepted values: "repeat_delivery", "free_pickup_today", "same_day_delivery". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.PETCO_SEARCH, + "query": query, + "start_page": start_page, + "store_id": store_id, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + store_id: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Petco URL. + + Args: + url (str): Direct URL to any Petco page. + store_id (Optional[int]): Specify a Petco store. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.PETCO_URL, + "url": url, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/publix/__init__.py b/src/oxylabs/sources/north_american/publix/__init__.py new file mode 100644 index 0000000..6ee7f0c --- /dev/null +++ b/src/oxylabs/sources/north_american/publix/__init__.py @@ -0,0 +1 @@ +from .publix import Publix, PublixAsync diff --git a/src/oxylabs/sources/north_american/publix/publix.py b/src/oxylabs/sources/north_american/publix/publix.py new file mode 100644 index 0000000..0d2c553 --- /dev/null +++ b/src/oxylabs/sources/north_american/publix/publix.py @@ -0,0 +1,307 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Publix: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Publix class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + store_id: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Publix search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + store_id (Optional[int]): Specify a store. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.PUBLIX_SEARCH, + "query": query, + "start_page": start_page, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + store_id: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Publix product page for a given product ID. + + Args: + product_id (str): Publix product ID. + store_id (Optional[int]): Specify a store. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.PUBLIX_PRODUCT, + "product_id": product_id, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + store_id: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Publix URL. + + Args: + url (str): Direct URL to any Publix page. + store_id (Optional[int]): Specify a store. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.PUBLIX_URL, + "url": url, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class PublixAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the PublixAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + start_page: Optional[int] = None, + store_id: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Publix search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + start_page (Optional[int]): Starting page number. + store_id (Optional[int]): Specify a store. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.PUBLIX_SEARCH, + "query": query, + "start_page": start_page, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + store_id: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Publix product page for a given product ID. + + Args: + product_id (str): Publix product ID. + store_id (Optional[int]): Specify a store. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.PUBLIX_PRODUCT, + "product_id": product_id, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + store_id: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Publix URL. + + Args: + url (str): Direct URL to any Publix page. + store_id (Optional[int]): Specify a store. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.PUBLIX_URL, + "url": url, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/staples/__init__.py b/src/oxylabs/sources/north_american/staples/__init__.py new file mode 100644 index 0000000..dd02811 --- /dev/null +++ b/src/oxylabs/sources/north_american/staples/__init__.py @@ -0,0 +1 @@ +from .staples import Staples, StaplesAsync diff --git a/src/oxylabs/sources/north_american/staples/staples.py b/src/oxylabs/sources/north_american/staples/staples.py new file mode 100644 index 0000000..a1c82ad --- /dev/null +++ b/src/oxylabs/sources/north_american/staples/staples.py @@ -0,0 +1,127 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Staples: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Staples class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Staples search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "com", "ca", "co.uk". + start_page (Optional[int]): Starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.STAPLES_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class StaplesAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the StaplesAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Staples search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization. Accepted values: + "com", "ca", "co.uk". + start_page (Optional[int]): Starting page number. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.STAPLES_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/target_store/__init__.py b/src/oxylabs/sources/north_american/target_store/__init__.py new file mode 100644 index 0000000..fa15162 --- /dev/null +++ b/src/oxylabs/sources/north_american/target_store/__init__.py @@ -0,0 +1 @@ +from .target_store import TargetStore, TargetStoreAsync diff --git a/src/oxylabs/sources/north_american/target_store/target_store.py b/src/oxylabs/sources/north_american/target_store/target_store.py new file mode 100644 index 0000000..48ea929 --- /dev/null +++ b/src/oxylabs/sources/north_american/target_store/target_store.py @@ -0,0 +1,452 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class TargetStore: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the TargetStore class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + parse: Optional[bool] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Target search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + store_id (Optional[int]): Set store location. + delivery_zip (Optional[str]): Set shipping destination. + fulfillment_type (Optional[str]): Order fulfillment method. + Accepted values: "pickup", "shipping", "shop_in_store", "same_day_delivery". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + parse (Optional[bool]): Returns parsed data when set to true. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.TARGET_SEARCH, + "query": query, + "store_id": store_id, + "delivery_zip": delivery_zip, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + "parse": parse, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + parse: Optional[bool] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Target product page for a given product ID. + + Args: + product_id (str): Target product ID. + store_id (Optional[int]): Set store location. + delivery_zip (Optional[str]): Set shipping destination. + fulfillment_type (Optional[str]): Order fulfillment method. + Accepted values: "pickup", "delivery", "shipping". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + parse (Optional[bool]): Returns parsed data when set to true. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.TARGET_PRODUCT, + "product_id": product_id, + "store_id": store_id, + "delivery_zip": delivery_zip, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + "parse": parse, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_category( + self, + category_id: str, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Target category page for a given category ID. + + Args: + category_id (str): Target taxonomy node ID. + store_id (Optional[int]): Set store location. + delivery_zip (Optional[str]): Set shipping destination. + fulfillment_type (Optional[str]): Order fulfillment method. + Accepted values: "pickup", "shipping", "shop_in_store", "same_day_delivery". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.TARGET_CATEGORY, + "category_id": category_id, + "store_id": store_id, + "delivery_zip": delivery_zip, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Target URL. + + Args: + url (str): Direct URL to any Target page. + store_id (Optional[int]): Set store location. + delivery_zip (Optional[str]): Set shipping destination. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.TARGET_URL, + "url": url, + "store_id": store_id, + "delivery_zip": delivery_zip, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class TargetStoreAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the TargetStoreAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + parse: Optional[bool] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Target search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + store_id (Optional[int]): Set store location. + delivery_zip (Optional[str]): Set shipping destination. + fulfillment_type (Optional[str]): Order fulfillment method. + Accepted values: "pickup", "shipping", "shop_in_store", "same_day_delivery". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + parse (Optional[bool]): Returns parsed data when set to true. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.TARGET_SEARCH, + "query": query, + "store_id": store_id, + "delivery_zip": delivery_zip, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + "parse": parse, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + parse: Optional[bool] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Target product page for a given product ID. + + Args: + product_id (str): Target product ID. + store_id (Optional[int]): Set store location. + delivery_zip (Optional[str]): Set shipping destination. + fulfillment_type (Optional[str]): Order fulfillment method. + Accepted values: "pickup", "delivery", "shipping". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + parse (Optional[bool]): Returns parsed data when set to true. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.TARGET_PRODUCT, + "product_id": product_id, + "store_id": store_id, + "delivery_zip": delivery_zip, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + "parse": parse, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_category( + self, + category_id: str, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Target category page for a given category ID. + + Args: + category_id (str): Target taxonomy node ID. + store_id (Optional[int]): Set store location. + delivery_zip (Optional[str]): Set shipping destination. + fulfillment_type (Optional[str]): Order fulfillment method. + Accepted values: "pickup", "shipping", "shop_in_store", "same_day_delivery". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.TARGET_CATEGORY, + "category_id": category_id, + "store_id": store_id, + "delivery_zip": delivery_zip, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + store_id: Optional[int] = None, + delivery_zip: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Target URL. + + Args: + url (str): Direct URL to any Target page. + store_id (Optional[int]): Set store location. + delivery_zip (Optional[str]): Set shipping destination. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.TARGET_URL, + "url": url, + "store_id": store_id, + "delivery_zip": delivery_zip, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/north_american/walmart/__init__.py b/src/oxylabs/sources/north_american/walmart/__init__.py new file mode 100644 index 0000000..92df89f --- /dev/null +++ b/src/oxylabs/sources/north_american/walmart/__init__.py @@ -0,0 +1 @@ +from .walmart import Walmart, WalmartAsync diff --git a/src/oxylabs/sources/north_american/walmart/walmart.py b/src/oxylabs/sources/north_american/walmart/walmart.py new file mode 100644 index 0000000..53fb4fe --- /dev/null +++ b/src/oxylabs/sources/north_american/walmart/walmart.py @@ -0,0 +1,379 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Walmart: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Walmart class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + min_price: Optional[float] = None, + max_price: Optional[float] = None, + sort_by: Optional[str] = None, + delivery_zip: Optional[str] = None, + store_id: Optional[str] = None, + fulfillment_speed: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + parse: Optional[bool] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Walmart search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization for Walmart. + start_page (Optional[int]): Starting page number. + min_price (Optional[float]): Set the minimum price. + max_price (Optional[float]): Set the maximum price. + sort_by (Optional[str]): Select sorting of products. Accepted values: + "price_low", "price_high", "best_seller", "best_match". + delivery_zip (Optional[str]): Set the shipping to location. + store_id (Optional[str]): Set the store location. + fulfillment_speed (Optional[str]): Set the fulfillment speed. Accepted values: + "today", "2_days", "anytime", "tomorrow". + fulfillment_type (Optional[str]): Set the fulfillment type. Accepted values: + "in_store". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + parse (Optional[bool]): Returns parsed data when set to true. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.WALMART_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "min_price": min_price, + "max_price": max_price, + "sort_by": sort_by, + "delivery_zip": delivery_zip, + "store_id": store_id, + "fulfillment_speed": fulfillment_speed, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + "parse": parse, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + delivery_zip: Optional[str] = None, + store_id: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a Walmart product page for a given product ID. + + Args: + product_id (str): Walmart product ID. + domain (Optional[str]): Domain localization for Walmart. + delivery_zip (Optional[str]): Set the shipping to location. + store_id (Optional[str]): Set the store location. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.WALMART_PRODUCT, + "product_id": product_id, + "domain": domain, + "delivery_zip": delivery_zip, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Walmart URL. + + Args: + url (str): Direct URL to any Walmart page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.WALMART_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class WalmartAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the WalmartAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_search( + self, + query: str, + domain: Optional[str] = None, + start_page: Optional[int] = None, + min_price: Optional[float] = None, + max_price: Optional[float] = None, + sort_by: Optional[str] = None, + delivery_zip: Optional[str] = None, + store_id: Optional[str] = None, + fulfillment_speed: Optional[str] = None, + fulfillment_type: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + parse: Optional[bool] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Walmart search results for a given query. + + Args: + query (str): The keyword or phrase to search for. + domain (Optional[str]): Domain localization for Walmart. + start_page (Optional[int]): Starting page number. + min_price (Optional[float]): Set the minimum price. + max_price (Optional[float]): Set the maximum price. + sort_by (Optional[str]): Select sorting of products. Accepted values: + "price_low", "price_high", "best_seller", "best_match". + delivery_zip (Optional[str]): Set the shipping to location. + store_id (Optional[str]): Set the store location. + fulfillment_speed (Optional[str]): Set the fulfillment speed. Accepted values: + "today", "2_days", "anytime", "tomorrow". + fulfillment_type (Optional[str]): Set the fulfillment type. Accepted values: + "in_store". + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + parse (Optional[bool]): Returns parsed data when set to true. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.WALMART_SEARCH, + "query": query, + "domain": domain, + "start_page": start_page, + "min_price": min_price, + "max_price": max_price, + "sort_by": sort_by, + "delivery_zip": delivery_zip, + "store_id": store_id, + "fulfillment_speed": fulfillment_speed, + "fulfillment_type": fulfillment_type, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + "parse": parse, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_product( + self, + product_id: str, + domain: Optional[str] = None, + delivery_zip: Optional[str] = None, + store_id: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a Walmart product page for a given product ID. + + Args: + product_id (str): Walmart product ID. + domain (Optional[str]): Domain localization for Walmart. + delivery_zip (Optional[str]): Set the shipping to location. + store_id (Optional[str]): Set the store location. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.WALMART_PRODUCT, + "product_id": product_id, + "domain": domain, + "delivery_zip": delivery_zip, + "store_id": store_id, + "user_agent_type": user_agent_type, + "render": render, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Walmart URL. + + Args: + url (str): Direct URL to any Walmart page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.WALMART_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/perplexity/__init__.py b/src/oxylabs/sources/perplexity/__init__.py new file mode 100644 index 0000000..f23fd6b --- /dev/null +++ b/src/oxylabs/sources/perplexity/__init__.py @@ -0,0 +1 @@ +from .perplexity import Perplexity, PerplexityAsync diff --git a/src/oxylabs/sources/perplexity/perplexity.py b/src/oxylabs/sources/perplexity/perplexity.py new file mode 100644 index 0000000..c5b246f --- /dev/null +++ b/src/oxylabs/sources/perplexity/perplexity.py @@ -0,0 +1,113 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Perplexity: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Perplexity class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape( + self, + prompt: str, + geo_location: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes Perplexity responses for a given prompt. + + Args: + prompt (str): The prompt or question to submit to Perplexity. + geo_location (Optional[str]): Specify a country to send the prompt from. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.PERPLEXITY, + "prompt": prompt, + "geo_location": geo_location, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class PerplexityAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the PerplexityAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape( + self, + prompt: str, + geo_location: Optional[str] = None, + parse: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes Perplexity responses for a given prompt. + + Args: + prompt (str): The prompt or question to submit to Perplexity. + geo_location (Optional[str]): Specify a country to send the prompt from. + parse (Optional[bool]): Returns parsed data when set to true. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.PERPLEXITY, + "prompt": prompt, + "geo_location": geo_location, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/real_estate/__init__.py b/src/oxylabs/sources/real_estate/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/oxylabs/sources/real_estate/airbnb/__init__.py b/src/oxylabs/sources/real_estate/airbnb/__init__.py new file mode 100644 index 0000000..f6c876b --- /dev/null +++ b/src/oxylabs/sources/real_estate/airbnb/__init__.py @@ -0,0 +1 @@ +from .airbnb import Airbnb, AirbnbAsync diff --git a/src/oxylabs/sources/real_estate/airbnb/airbnb.py b/src/oxylabs/sources/real_estate/airbnb/airbnb.py new file mode 100644 index 0000000..7782769 --- /dev/null +++ b/src/oxylabs/sources/real_estate/airbnb/airbnb.py @@ -0,0 +1,198 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Airbnb: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Airbnb class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes an Airbnb property page for a given room ID. + + Args: + product_id (str): The unique numerical ID for an Airbnb property listing. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.AIRBNB_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from an Airbnb URL. + + Args: + url (str): Direct URL to any Airbnb page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.AIRBNB_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class AirbnbAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the AirbnbAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_product( + self, + product_id: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes an Airbnb property page for a given room ID. + + Args: + product_id (str): The unique numerical ID for an Airbnb property listing. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.AIRBNB_PRODUCT, + "product_id": product_id, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from an Airbnb URL. + + Args: + url (str): Direct URL to any Airbnb page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.AIRBNB_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/real_estate/zillow/__init__.py b/src/oxylabs/sources/real_estate/zillow/__init__.py new file mode 100644 index 0000000..9ddeb24 --- /dev/null +++ b/src/oxylabs/sources/real_estate/zillow/__init__.py @@ -0,0 +1 @@ +from .zillow import Zillow, ZillowAsync diff --git a/src/oxylabs/sources/real_estate/zillow/zillow.py b/src/oxylabs/sources/real_estate/zillow/zillow.py new file mode 100644 index 0000000..591a8c4 --- /dev/null +++ b/src/oxylabs/sources/real_estate/zillow/zillow.py @@ -0,0 +1,113 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Zillow: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Zillow class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a Zillow URL. + + Args: + url (str): Direct URL to any Zillow page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.ZILLOW_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class ZillowAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the ZillowAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a Zillow URL. + + Args: + url (str): Direct URL to any Zillow page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.ZILLOW_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/tiktok/__init__.py b/src/oxylabs/sources/tiktok/__init__.py new file mode 100644 index 0000000..37432d8 --- /dev/null +++ b/src/oxylabs/sources/tiktok/__init__.py @@ -0,0 +1 @@ +from .tiktok import Tiktok, TiktokAsync diff --git a/src/oxylabs/sources/tiktok/tiktok.py b/src/oxylabs/sources/tiktok/tiktok.py new file mode 100644 index 0000000..0ca86d0 --- /dev/null +++ b/src/oxylabs/sources/tiktok/tiktok.py @@ -0,0 +1,289 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Tiktok: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Tiktok class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_shop_search( + self, + query: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes TikTok Shop search results for a given query. + + Args: + query (str): The keyword or phrase to search for products. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.TIKTOK_SHOP_SEARCH, + "query": query, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_shop_product( + self, + product_id: str, + domain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a TikTok Shop product page for a given product ID. + + Args: + product_id (str): A unique TikTok Shop product ID. + domain (Optional[str]): Domain localization. Currently only "com" (US) is supported. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.TIKTOK_SHOP_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_shop_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes content from a TikTok Shop URL. + + Args: + url (str): Direct URL to any TikTok Shop page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.TIKTOK_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class TiktokAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the TiktokAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_shop_search( + self, + query: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes TikTok Shop search results for a given query. + + Args: + query (str): The keyword or phrase to search for products. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.TIKTOK_SHOP_SEARCH, + "query": query, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_shop_product( + self, + product_id: str, + domain: Optional[str] = None, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a TikTok Shop product page for a given product ID. + + Args: + product_id (str): A unique TikTok Shop product ID. + domain (Optional[str]): Domain localization. Currently only "com" (US) is supported. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.TIKTOK_SHOP_PRODUCT, + "product_id": product_id, + "domain": domain, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_shop_url( + self, + url: str, + user_agent_type: Optional[str] = None, + render: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes content from a TikTok Shop URL. + + Args: + url (str): Direct URL to any TikTok Shop page. + user_agent_type (Optional[str]): Device type and browser. + render (Optional[str]): Enables JavaScript rendering. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.TIKTOK_URL, + "url": url, + "user_agent_type": user_agent_type, + "render": render, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/sources/universal/universal.py b/src/oxylabs/sources/universal/universal.py index f7b1c3b..1752f7e 100644 --- a/src/oxylabs/sources/universal/universal.py +++ b/src/oxylabs/sources/universal/universal.py @@ -24,13 +24,11 @@ def scrape_url( url: str, user_agent_type: Optional[str] = None, geo_location: Optional[str] = None, - locale: Optional[str] = None, render: Optional[str] = None, content_encoding: Optional[str] = None, context: Optional[list] = None, callback_url: Optional[str] = None, parse: Optional[bool] = None, - parser_type: Optional[str] = None, parsing_instructions: Optional[dict] = None, browser_instructions: Optional[dict] = None, request_timeout: Optional[int] = 165, @@ -43,13 +41,11 @@ def scrape_url( url (str): The URL to be scraped. user_agent_type (Optional[str]): Device type and browser. geo_location (Optional[str]): None, - locale (Optional[str]): Accept-Language header value which changes page web interface language. render (Optional[str]): Enables JavaScript rendering. content_encoding: Add this parameter if you are downloading images. context: Optional[list], callback_url (Optional[str]): URL to your callback endpoint. parse (Optional[bool]): true will return structured data. - parser_type: Set the value to ecommerce_product to access our AI-powered Adaptive Parser. parsing_instructions (Optional[dict]): Instructions for parsing the results. browser_instructions (Optional[dict]): Browser instructions that are executed when rendering JavaScript. request_timeout (int | 165, optional): The interval in seconds for @@ -66,13 +62,11 @@ def scrape_url( "url": url, "user_agent_type": user_agent_type, "geo_location": geo_location, - "locale": locale, "render": render, "content_encoding": content_encoding, "context": context, "callback_url": callback_url, "parse": parse, - "parser_type": parser_type, "parsing_instructions": parsing_instructions, "browser_instructions": browser_instructions, **kwargs, @@ -96,13 +90,11 @@ async def scrape_url( url: str, user_agent_type: Optional[str] = None, geo_location: Optional[str] = None, - locale: Optional[str] = None, render: Optional[str] = None, content_encoding: Optional[str] = None, context: Optional[list] = None, callback_url: Optional[str] = None, parse: Optional[bool] = None, - parser_type: Optional[str] = None, parsing_instructions: Optional[dict] = None, browser_instructions: Optional[dict] = None, request_timeout: Optional[int] = 165, @@ -117,13 +109,11 @@ async def scrape_url( url (str): The URL to be scraped. user_agent_type (Optional[str]): Device type and browser. geo_location (Optional[str]): None, - locale (Optional[str]): Accept-Language header value which changes page web interface language. render (Optional[str]): Enables JavaScript rendering. content_encoding: Add this parameter if you are downloading images. context: Optional[list], callback_url (Optional[str]): URL to your callback endpoint. parse (Optional[bool]): true will return structured data. - parser_type: Set the value to ecommerce_product to access our AI-powered Adaptive Parser. parsing_instructions (Optional[dict]): Instructions for parsing the results. browser_instructions (Optional[dict]): Browser instructions that are executed when rendering JavaScript. request_timeout (int | 165, optional): The interval in seconds for @@ -149,13 +139,11 @@ async def scrape_url( "url": url, "user_agent_type": user_agent_type, "geo_location": geo_location, - "locale": locale, "render": render, "content_encoding": content_encoding, "context": context, "callback_url": callback_url, "parse": parse, - "parser_type": parser_type, "parsing_instructions": parsing_instructions, "browser_instructions": browser_instructions, **kwargs, diff --git a/src/oxylabs/sources/youtube/__init__.py b/src/oxylabs/sources/youtube/__init__.py new file mode 100644 index 0000000..1885db6 --- /dev/null +++ b/src/oxylabs/sources/youtube/__init__.py @@ -0,0 +1 @@ +from .youtube import Youtube, YoutubeAsync diff --git a/src/oxylabs/sources/youtube/youtube.py b/src/oxylabs/sources/youtube/youtube.py new file mode 100644 index 0000000..1130b6f --- /dev/null +++ b/src/oxylabs/sources/youtube/youtube.py @@ -0,0 +1,910 @@ +from typing import Optional + +from oxylabs.internal.api import RealtimeAPI, AsyncAPI +from oxylabs.sources.response import Response +from oxylabs.utils.types import source +from oxylabs.utils.utils import prepare_config + + +class Youtube: + def __init__(self, api_instance: RealtimeAPI) -> None: + """ + Initializes an instance of the Youtube class. + + Args: + api_instance: An instance of the RealtimeAPI class used for making requests. + """ + self._api_instance = api_instance + + def scrape_transcript( + self, + query: str, + context: Optional[list] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes a YouTube video transcript for a given query. + + Args: + query (str): A YouTube video ID. + context (Optional[list]): Context parameters (language_code, transcript_origin). + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.YOUTUBE_TRANSCRIPT, + "query": query, + "context": context, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_search( + self, + query: str, + upload_date: Optional[str] = None, + type: Optional[str] = None, + duration: Optional[str] = None, + sort_by: Optional[str] = None, + filter_360: Optional[bool] = None, + filter_3d: Optional[bool] = None, + filter_4k: Optional[bool] = None, + creative_commons: Optional[bool] = None, + hd: Optional[bool] = None, + hdr: Optional[bool] = None, + live: Optional[bool] = None, + location: Optional[bool] = None, + purchased: Optional[bool] = None, + subtitles: Optional[bool] = None, + vr180: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes YouTube search results for a given query. Returns up to 20 results. + + Args: + query (str): The search term. + upload_date (Optional[str]): Filter by upload date. Accepted values: + "today", "last_hour", "this_week", "this_month", "this_year". + type (Optional[str]): Filter by type. Accepted values: + "video", "channel", "playlist", "movie". + duration (Optional[str]): Filter by duration. Accepted values: + "<4", "4-20", ">20". + sort_by (Optional[str]): Sort results. Accepted values: + "rating", "relevance", "view_count", "upload_date". + filter_360 (Optional[bool]): Returns 360-degree videos. + filter_3d (Optional[bool]): Returns 3D videos. + filter_4k (Optional[bool]): Returns 4K videos. + creative_commons (Optional[bool]): Only Creative Commons licensed videos. + hd (Optional[bool]): Returns HD videos. + hdr (Optional[bool]): Returns HDR videos. + live (Optional[bool]): Returns live streams. + location (Optional[bool]): Returns videos with location info. + purchased (Optional[bool]): Returns purchased content. + subtitles (Optional[bool]): Returns videos with subtitles/CC. + vr180 (Optional[bool]): Returns VR180 videos. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.YOUTUBE_SEARCH, + "query": query, + "upload_date": upload_date, + "type": type, + "duration": duration, + "sort_by": sort_by, + "360": filter_360, + "3d": filter_3d, + "4k": filter_4k, + "creative_commons": creative_commons, + "hd": hd, + "hdr": hdr, + "live": live, + "location": location, + "purchased": purchased, + "subtitles": subtitles, + "vr180": vr180, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_search_max( + self, + query: str, + upload_date: Optional[str] = None, + type: Optional[str] = None, + duration: Optional[str] = None, + sort_by: Optional[str] = None, + filter_360: Optional[bool] = None, + filter_3d: Optional[bool] = None, + filter_4k: Optional[bool] = None, + creative_commons: Optional[bool] = None, + hd: Optional[bool] = None, + hdr: Optional[bool] = None, + live: Optional[bool] = None, + location: Optional[bool] = None, + purchased: Optional[bool] = None, + subtitles: Optional[bool] = None, + vr180: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes YouTube search results for a given query. Returns up to 700 results. + + Args: + query (str): The search term. + upload_date (Optional[str]): Filter by upload date. Accepted values: + "today", "last_hour", "this_week", "this_month", "this_year". + type (Optional[str]): Filter by type. Accepted values: + "video", "channel", "playlist", "movie". + duration (Optional[str]): Filter by duration. Accepted values: + "<4", "4-20", ">20". + sort_by (Optional[str]): Sort results. Accepted values: + "rating", "relevance", "view_count", "upload_date". + filter_360 (Optional[bool]): Returns 360-degree videos. + filter_3d (Optional[bool]): Returns 3D videos. + filter_4k (Optional[bool]): Returns 4K videos. + creative_commons (Optional[bool]): Only Creative Commons licensed videos. + hd (Optional[bool]): Returns HD videos. + hdr (Optional[bool]): Returns HDR videos. + live (Optional[bool]): Returns live streams. + location (Optional[bool]): Returns videos with location info. + purchased (Optional[bool]): Returns purchased content. + subtitles (Optional[bool]): Returns videos with subtitles/CC. + vr180 (Optional[bool]): Returns VR180 videos. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.YOUTUBE_SEARCH_MAX, + "query": query, + "upload_date": upload_date, + "type": type, + "duration": duration, + "sort_by": sort_by, + "360": filter_360, + "3d": filter_3d, + "4k": filter_4k, + "creative_commons": creative_commons, + "hd": hd, + "hdr": hdr, + "live": live, + "location": location, + "purchased": purchased, + "subtitles": subtitles, + "vr180": vr180, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_metadata( + self, + query: str, + parse: Optional[bool] = True, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes YouTube video metadata for a given video ID. + + Args: + query (str): A YouTube video ID. + parse (Optional[bool]): Returns parsed data. Required for this source, + defaults to True. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.YOUTUBE_METADATA, + "query": query, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_channel( + self, + channel_handle: str, + parse: Optional[bool] = True, + limit: Optional[int] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes YouTube channel information for a given channel handle. + + Args: + channel_handle (str): YouTube channel handle (e.g. "@Oxylabs"). + parse (Optional[bool]): Returns parsed data. Defaults to True. + limit (Optional[int]): Limits number of videos in the videos array. + Defaults to 20. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.YOUTUBE_CHANNEL, + "channel_handle": channel_handle, + "parse": parse, + "limit": limit, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_subtitles( + self, + query: str, + context: Optional[list] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes YouTube video subtitles for a given video ID. + + Args: + query (str): A YouTube video ID. + context (Optional[list]): Context parameters (language_code, subtitle_origin). + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.YOUTUBE_SUBTITLES, + "query": query, + "context": context, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_video_trainability( + self, + video_id: str, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Checks AI training eligibility for a YouTube video. + + Args: + video_id (str): A YouTube video ID. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.YOUTUBE_VIDEO_TRAINABILITY, + "video_id": video_id, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + def scrape_autocomplete( + self, + query: str, + location: Optional[str] = None, + language: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + **kwargs + ) -> Response: + """ + Scrapes YouTube autocomplete suggestions for a given query. + + Args: + query (str): The search term for keyword suggestions. + location (Optional[str]): 2-letter country code. Defaults to "US". + language (Optional[str]): Language code. Defaults to "en". + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config(request_timeout=request_timeout) + payload = { + "source": source.YOUTUBE_AUTOCOMPLETE, + "query": query, + "location": location, + "language": language, + "callback_url": callback_url, + **kwargs, + } + api_response = self._api_instance.get_response(payload, config) + return Response(api_response) + + +class YoutubeAsync: + def __init__(self, api_instance: AsyncAPI) -> None: + """ + Initializes an instance of the YoutubeAsync class. + + Args: + api_instance: An instance of the AsyncAPI class used for making requests. + """ + self._api_instance = api_instance + + async def scrape_transcript( + self, + query: str, + context: Optional[list] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes a YouTube video transcript for a given query. + + Args: + query (str): A YouTube video ID. + context (Optional[list]): Context parameters (language_code, transcript_origin). + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.YOUTUBE_TRANSCRIPT, + "query": query, + "context": context, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_search( + self, + query: str, + upload_date: Optional[str] = None, + type: Optional[str] = None, + duration: Optional[str] = None, + sort_by: Optional[str] = None, + filter_360: Optional[bool] = None, + filter_3d: Optional[bool] = None, + filter_4k: Optional[bool] = None, + creative_commons: Optional[bool] = None, + hd: Optional[bool] = None, + hdr: Optional[bool] = None, + live: Optional[bool] = None, + location: Optional[bool] = None, + purchased: Optional[bool] = None, + subtitles: Optional[bool] = None, + vr180: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes YouTube search results for a given query. + Returns up to 20 results. + + Args: + query (str): The search term. + upload_date (Optional[str]): Filter by upload date. Accepted values: + "today", "last_hour", "this_week", "this_month", "this_year". + type (Optional[str]): Filter by type. Accepted values: + "video", "channel", "playlist", "movie". + duration (Optional[str]): Filter by duration. Accepted values: + "<4", "4-20", ">20". + sort_by (Optional[str]): Sort results. Accepted values: + "rating", "relevance", "view_count", "upload_date". + filter_360 (Optional[bool]): Returns 360-degree videos. + filter_3d (Optional[bool]): Returns 3D videos. + filter_4k (Optional[bool]): Returns 4K videos. + creative_commons (Optional[bool]): Only Creative Commons licensed videos. + hd (Optional[bool]): Returns HD videos. + hdr (Optional[bool]): Returns HDR videos. + live (Optional[bool]): Returns live streams. + location (Optional[bool]): Returns videos with location info. + purchased (Optional[bool]): Returns purchased content. + subtitles (Optional[bool]): Returns videos with subtitles/CC. + vr180 (Optional[bool]): Returns VR180 videos. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.YOUTUBE_SEARCH, + "query": query, + "upload_date": upload_date, + "type": type, + "duration": duration, + "sort_by": sort_by, + "360": filter_360, + "3d": filter_3d, + "4k": filter_4k, + "creative_commons": creative_commons, + "hd": hd, + "hdr": hdr, + "live": live, + "location": location, + "purchased": purchased, + "subtitles": subtitles, + "vr180": vr180, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_search_max( + self, + query: str, + upload_date: Optional[str] = None, + type: Optional[str] = None, + duration: Optional[str] = None, + sort_by: Optional[str] = None, + filter_360: Optional[bool] = None, + filter_3d: Optional[bool] = None, + filter_4k: Optional[bool] = None, + creative_commons: Optional[bool] = None, + hd: Optional[bool] = None, + hdr: Optional[bool] = None, + live: Optional[bool] = None, + location: Optional[bool] = None, + purchased: Optional[bool] = None, + subtitles: Optional[bool] = None, + vr180: Optional[bool] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes YouTube search results for a given query. + Returns up to 700 results. + + Args: + query (str): The search term. + upload_date (Optional[str]): Filter by upload date. Accepted values: + "today", "last_hour", "this_week", "this_month", "this_year". + type (Optional[str]): Filter by type. Accepted values: + "video", "channel", "playlist", "movie". + duration (Optional[str]): Filter by duration. Accepted values: + "<4", "4-20", ">20". + sort_by (Optional[str]): Sort results. Accepted values: + "rating", "relevance", "view_count", "upload_date". + filter_360 (Optional[bool]): Returns 360-degree videos. + filter_3d (Optional[bool]): Returns 3D videos. + filter_4k (Optional[bool]): Returns 4K videos. + creative_commons (Optional[bool]): Only Creative Commons licensed videos. + hd (Optional[bool]): Returns HD videos. + hdr (Optional[bool]): Returns HDR videos. + live (Optional[bool]): Returns live streams. + location (Optional[bool]): Returns videos with location info. + purchased (Optional[bool]): Returns purchased content. + subtitles (Optional[bool]): Returns videos with subtitles/CC. + vr180 (Optional[bool]): Returns VR180 videos. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.YOUTUBE_SEARCH_MAX, + "query": query, + "upload_date": upload_date, + "type": type, + "duration": duration, + "sort_by": sort_by, + "360": filter_360, + "3d": filter_3d, + "4k": filter_4k, + "creative_commons": creative_commons, + "hd": hd, + "hdr": hdr, + "live": live, + "location": location, + "purchased": purchased, + "subtitles": subtitles, + "vr180": vr180, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_metadata( + self, + query: str, + parse: Optional[bool] = True, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes YouTube video metadata for a given video ID. + + Args: + query (str): A YouTube video ID. + parse (Optional[bool]): Returns parsed data. Required for this source, + defaults to True. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.YOUTUBE_METADATA, + "query": query, + "parse": parse, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_channel( + self, + channel_handle: str, + parse: Optional[bool] = True, + limit: Optional[int] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes YouTube channel information for a given channel handle. + + Args: + channel_handle (str): YouTube channel handle (e.g. "@Oxylabs"). + parse (Optional[bool]): Returns parsed data. Defaults to True. + limit (Optional[int]): Limits number of videos in the videos array. + Defaults to 20. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.YOUTUBE_CHANNEL, + "channel_handle": channel_handle, + "parse": parse, + "limit": limit, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_subtitles( + self, + query: str, + context: Optional[list] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes YouTube video subtitles for a given video ID. + + Args: + query (str): A YouTube video ID. + context (Optional[list]): Context parameters (language_code, subtitle_origin). + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.YOUTUBE_SUBTITLES, + "query": query, + "context": context, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_video_trainability( + self, + video_id: str, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously checks AI training eligibility for a YouTube video. + + Args: + video_id (str): A YouTube video ID. + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.YOUTUBE_VIDEO_TRAINABILITY, + "video_id": video_id, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_download( + self, + query: str, + storage_type: str, + storage_url: str, + context: Optional[list] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously downloads YouTube video/audio content to cloud storage. + + This method is only available via Push-Pull (async) integration. + + Args: + query (str): A YouTube video ID. + storage_type (str): Cloud storage type. Accepted values: + "gcs", "s3", "s3_compatible". + storage_url (str): Bucket name (AWS S3) or URL (other S3-compatible storage). + context (Optional[list]): Context parameters (download_type, video_quality). + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.YOUTUBE_DOWNLOAD, + "query": query, + "storage_type": storage_type, + "storage_url": storage_url, + "context": context, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) + + async def scrape_autocomplete( + self, + query: str, + location: Optional[str] = None, + language: Optional[str] = None, + callback_url: Optional[str] = None, + request_timeout: Optional[int] = 165, + job_completion_timeout: Optional[int] = None, + poll_interval: Optional[int] = None, + **kwargs + ) -> Response: + """ + Asynchronously scrapes YouTube autocomplete suggestions for a given query. + + Args: + query (str): The search term for keyword suggestions. + location (Optional[str]): 2-letter country code. Defaults to "US". + language (Optional[str]): Language code. Defaults to "en". + callback_url (Optional[str]): URL to your callback endpoint. + request_timeout (int | 165, optional): The interval in seconds for + the request to time out if no response is returned. + Defaults to 165. + poll_interval (Optional[int]): The interval in seconds to poll + the server for a response. + job_completion_timeout (Optional[int]): The interval in + seconds for the job to time out if no response is returned. + + Returns: + Response: The response from the server after the job is completed. + """ + + config = prepare_config( + request_timeout=request_timeout, + poll_interval=poll_interval, + job_completion_timeout=job_completion_timeout, + async_integration=True, + ) + payload = { + "source": source.YOUTUBE_AUTOCOMPLETE, + "query": query, + "location": location, + "language": language, + "callback_url": callback_url, + **kwargs, + } + api_response = await self._api_instance.get_response(payload, config) + return Response(api_response) diff --git a/src/oxylabs/utils/types/source.py b/src/oxylabs/utils/types/source.py index 6481568..d2fbf74 100644 --- a/src/oxylabs/utils/types/source.py +++ b/src/oxylabs/utils/types/source.py @@ -7,6 +7,7 @@ GOOGLE_TRENDS_EXPLORE = "google_trends_explore" GOOGLE_MAPS = "google_maps" GOOGLE_LENS = "google_lens" +GOOGLE_AI_MODE = "google_ai_mode" BING_URL = "bing" BING_SEARCH = "bing_search" @@ -20,7 +21,6 @@ GOOGLE_SHOPPING_URL = "google_shopping" GOOGLE_SHOPPING_SEARCH = "google_shopping_search" GOOGLE_SHOPPING_PRODUCT = "google_shopping_product" -GOOGLE_SHOPPING_PRICING = "google_shopping_pricing" WAYFAIR = "wayfair" WAYFAIR_SEARCH = "wayfair_search" @@ -40,4 +40,140 @@ KROGER_PRODUCT = "kroger_product" KROGER_SEARCH = "kroger_search" +STAPLES_SEARCH = "staples_search" + +WALMART_URL = "walmart" +WALMART_SEARCH = "walmart_search" +WALMART_PRODUCT = "walmart_product" + +BESTBUY_SEARCH = "bestbuy_search" +BESTBUY_PRODUCT = "bestbuy_product" + +BEDBATHANDBEYOND_URL = "bedbathandbeyond" +BEDBATHANDBEYOND_SEARCH = "bedbathandbeyond_search" +BEDBATHANDBEYOND_PRODUCT = "bedbathandbeyond_product" + +BODEGAAURRERA_URL = "bodegaaurrera" +BODEGAAURRERA_SEARCH = "bodegaaurrera_search" +BODEGAAURRERA_PRODUCT = "bodegaaurrera_product" + +COSTCO_URL = "costco" + +COSTCO_SEARCH = "costco_search" +COSTCO_PRODUCT = "costco_product" + +GRAINGER_URL = "grainger" +GRAINGER_SEARCH = "grainger_search" +GRAINGER_PRODUCT = "grainger_product" + +INSTACART_URL = "instacart" +INSTACART_SEARCH = "instacart_search" +INSTACART_PRODUCT = "instacart_product" + +LOWES_URL = "lowes" +LOWES_SEARCH = "lowes_search" +LOWES_PRODUCT = "lowes_product" + +MENARDS_URL = "menards" +MENARDS_SEARCH = "menards_search" +MENARDS_PRODUCT = "menards_product" + +PETCO_URL = "petco" +PETCO_SEARCH = "petco_search" + +PUBLIX_URL = "publix" +PUBLIX_SEARCH = "publix_search" +PUBLIX_PRODUCT = "publix_product" + +TARGET_URL = "target" +TARGET_SEARCH = "target_search" +TARGET_PRODUCT = "target_product" +TARGET_CATEGORY = "target_category" + +ALLEGRO_SEARCH = "allegro_search" +ALLEGRO_PRODUCT = "allegro_product" + +CDISCOUNT_URL = "cdiscount" +CDISCOUNT_SEARCH = "cdiscount_search" +CDISCOUNT_PRODUCT = "cdiscount_product" + +IDEALO_SEARCH = "idealo_search" + +MEDIAMARKT_URL = "mediamarkt" +MEDIAMARKT_SEARCH = "mediamarkt_search" +MEDIAMARKT_PRODUCT = "mediamarkt_product" + +ALIBABA_URL = "alibaba" +ALIBABA_SEARCH = "alibaba_search" +ALIBABA_PRODUCT = "alibaba_product" + +ALIEXPRESS_URL = "aliexpress" +ALIEXPRESS_SEARCH = "aliexpress_search" +ALIEXPRESS_PRODUCT = "aliexpress_product" + +FLIPKART_URL = "flipkart" +FLIPKART_SEARCH = "flipkart_search" +FLIPKART_PRODUCT = "flipkart_product" + +INDIAMART_URL = "indiamart" +INDIAMART_SEARCH = "indiamart_search" +INDIAMART_PRODUCT = "indiamart_product" + +LAZADA_URL = "lazada" +LAZADA_SEARCH = "lazada_search" +LAZADA_PRODUCT = "lazada_product" + +RAKUTEN_URL = "rakuten" +RAKUTEN_SEARCH = "rakuten_search" + +TOKOPEDIA_URL = "tokopedia" +TOKOPEDIA_SEARCH = "tokopedia_search" + +SHEIN_SEARCH = "shein_search" +AVNET_SEARCH = "avnet_search" +DCARD_SEARCH = "dcard_search" + +MERCADOLIBRE_URL = "mercadolibre" +MERCADOLIBRE_SEARCH = "mercadolibre_search" +MERCADOLIBRE_PRODUCT = "mercadolibre_product" + +MERCADOLIVRE_SEARCH = "mercadolivre_search" +MERCADOLIVRE_PRODUCT = "mercadolivre_product" + +MAGAZINELUIZA_URL = "magazineluiza" +MAGAZINELUIZA_SEARCH = "magazineluiza_search" +MAGAZINELUIZA_PRODUCT = "magazineluiza_product" + +FALABELLA_URL = "falabella" +FALABELLA_SEARCH = "falabella_search" +FALABELLA_PRODUCT = "falabella_product" + +AIRBNB_URL = "airbnb" +AIRBNB_PRODUCT = "airbnb_product" + +ZILLOW_URL = "zillow" + +CHATGPT = "chatgpt" +PERPLEXITY = "perplexity" + +TIKTOK_URL = "tiktok" +TIKTOK_SHOP_SEARCH = "tiktok_shop_search" +TIKTOK_SHOP_PRODUCT = "tiktok_shop_product" + +EBAY_URL = "ebay" +EBAY_SEARCH = "ebay_search" +EBAY_PRODUCT = "ebay_product" + +ETSY_URL = "etsy" +ETSY_SEARCH = "etsy_search" +ETSY_PRODUCT = "etsy_product" + YOUTUBE_TRANSCRIPT = "youtube_transcript" +YOUTUBE_SEARCH = "youtube_search" +YOUTUBE_SEARCH_MAX = "youtube_search_max" +YOUTUBE_METADATA = "youtube_metadata" +YOUTUBE_CHANNEL = "youtube_channel" +YOUTUBE_SUBTITLES = "youtube_subtitles" +YOUTUBE_DOWNLOAD = "youtube_download" +YOUTUBE_VIDEO_TRAINABILITY = "youtube_video_trainability" +YOUTUBE_AUTOCOMPLETE = "youtube_autocomplete" diff --git a/src/oxylabs/utils/utils.py b/src/oxylabs/utils/utils.py index f453087..18247b5 100644 --- a/src/oxylabs/utils/utils.py +++ b/src/oxylabs/utils/utils.py @@ -231,8 +231,8 @@ def validate_fn_args(function: str, args: Any) -> None: fn_name.XPATH_ONE: validate_string_array, fn_name.CSS: validate_string_array, fn_name.CSS_ONE: validate_string_array, - fn_name.AMOUNT_FROM_STRING: validate_string, - fn_name.AMOUNT_RANGE_FROM_STRING: validate_string, + fn_name.AMOUNT_FROM_STRING: validate_optional_string, + fn_name.AMOUNT_RANGE_FROM_STRING: validate_optional_string, fn_name.REGEX_FIND_ALL: validate_string, fn_name.JOIN: validate_optional_string, fn_name.REGEX_SEARCH: validate_list_string_optional_int, diff --git a/tests/.DS_Store b/tests/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..818e1bbdda80186b80595cf7245c4ea1aa80b92e GIT binary patch literal 6148 zcmeHK!A`?440X05I=Jk}F~6`s2vfPh3GIx8wxMxY)rx5XZuu6Di2vX#;KqOOY=^P7 zOxzHfY{_}4V<+xQR5wKA#_RcrXh=jkl(9EJ^Mmj>Ye#y?!h=rccw8<7J4ydL(s_21Q`w}!9?$PrUwO3VDXZ^Xc6&AQIkJPx8)9M%7z2CH0BSZ%eynJ%F<=ZB z0~-eT`{1FBAz@UMPX`*g0su3ZMbMXjI-q9*FeHqMut1!K0yWg7D~8i>*nRQ~38SKh zlarM(Pg>cf8;XX7V2KxFqTQn literal 0 HcmV?d00001 diff --git a/tests/sources/airbnb/__init__.py b/tests/sources/airbnb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/airbnb/test_airbnb.py b/tests/sources/airbnb/test_airbnb.py new file mode 100644 index 0000000..765e6ac --- /dev/null +++ b/tests/sources/airbnb/test_airbnb.py @@ -0,0 +1,27 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestAirbnbProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.airbnb._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.airbnb.scrape_product("11984394") + + self.assertEqual(captured["source"], "airbnb_product") + self.assertEqual(captured["product_id"], "11984394") + + +class TestAirbnbUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.airbnb._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.airbnb.scrape_url("https://www.airbnb.com/rooms/11984394") + + self.assertEqual(captured["source"], "airbnb") diff --git a/tests/sources/alibaba/__init__.py b/tests/sources/alibaba/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/alibaba/test_alibaba.py b/tests/sources/alibaba/test_alibaba.py new file mode 100644 index 0000000..9e9e162 --- /dev/null +++ b/tests/sources/alibaba/test_alibaba.py @@ -0,0 +1,48 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestAlibabaSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.alibaba._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.alibaba.scrape_search("electronics") + + self.assertEqual(captured["source"], "alibaba_search") + + def test_search_start_page(self): + client = RealtimeClient('user', 'pass') + api = client.alibaba._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.alibaba.scrape_search("electronics", start_page=2) + + self.assertEqual(captured["start_page"], 2) + + +class TestAlibabaProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.alibaba._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.alibaba.scrape_product("1234567890123") + + self.assertEqual(captured["source"], "alibaba_product") + + +class TestAlibabaUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.alibaba._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.alibaba.scrape_url("https://www.alibaba.com/") + + self.assertEqual(captured["source"], "alibaba") diff --git a/tests/sources/aliexpress/__init__.py b/tests/sources/aliexpress/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/aliexpress/test_aliexpress.py b/tests/sources/aliexpress/test_aliexpress.py new file mode 100644 index 0000000..932d578 --- /dev/null +++ b/tests/sources/aliexpress/test_aliexpress.py @@ -0,0 +1,38 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestAliexpressSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.aliexpress._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.aliexpress.scrape_search("headphones") + + self.assertEqual(captured["source"], "aliexpress_search") + + +class TestAliexpressProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.aliexpress._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.aliexpress.scrape_product("123456") + + self.assertEqual(captured["source"], "aliexpress_product") + + +class TestAliexpressUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.aliexpress._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.aliexpress.scrape_url("https://www.aliexpress.com/") + + self.assertEqual(captured["source"], "aliexpress") diff --git a/tests/sources/allegro/__init__.py b/tests/sources/allegro/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/allegro/test_allegro.py b/tests/sources/allegro/test_allegro.py new file mode 100644 index 0000000..3e12612 --- /dev/null +++ b/tests/sources/allegro/test_allegro.py @@ -0,0 +1,37 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestAllegroSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.allegro._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.allegro.scrape_search("laptop") + + self.assertEqual(captured["source"], "allegro_search") + + def test_search_localization(self): + client = RealtimeClient('user', 'pass') + api = client.allegro._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.allegro.scrape_search("laptop", delivery_time="one_day", shipping_from="poland") + + self.assertEqual(captured["delivery_time"], "one_day") + self.assertEqual(captured["shipping_from"], "poland") + + +class TestAllegroProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.allegro._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.allegro.scrape_product("12345678901") + + self.assertEqual(captured["source"], "allegro_product") diff --git a/tests/sources/amazon/test_amazon.py b/tests/sources/amazon/test_amazon.py new file mode 100644 index 0000000..c22357a --- /dev/null +++ b/tests/sources/amazon/test_amazon.py @@ -0,0 +1,149 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestAmazonSearchParams(unittest.TestCase): + """Tests that new parameters flow through to the payload for scrape_search.""" + + def test_search_locale(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.amazon.scrape_search("laptop", locale="en-us") + + self.assertEqual(captured["locale"], "en-us") + + def test_search_sort_by(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.amazon.scrape_search("laptop", sort_by="price-asc-rank") + + self.assertEqual(captured["sort_by"], "price-asc-rank") + + def test_search_refinements(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.amazon.scrape_search("laptop", refinements="p_n_condition-type:New") + + self.assertEqual(captured["refinements"], "p_n_condition-type:New") + + +class TestAmazonUrlParams(unittest.TestCase): + """Tests that new parameters flow through to the payload for scrape_url.""" + + def test_url_geo_location(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.amazon.scrape_url("https://www.amazon.com/dp/B09V3KXJPB", geo_location="United States") + + self.assertEqual(captured["geo_location"], "United States") + + def test_url_locale(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.amazon.scrape_url("https://www.amazon.com/dp/B09V3KXJPB", locale="en-us") + + self.assertEqual(captured["locale"], "en-us") + + def test_url_context(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + ctx = [{"key": "currency", "value": "USD"}] + client.amazon.scrape_url("https://www.amazon.com/dp/B09V3KXJPB", context=ctx) + + self.assertEqual(captured["context"], ctx) + + +class TestAmazonProductParams(unittest.TestCase): + """Tests that new parameters flow through to the payload for scrape_product.""" + + def test_product_locale(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.amazon.scrape_product("B09V3KXJPB", locale="en-us") + + self.assertEqual(captured["locale"], "en-us") + + +class TestAmazonPricingParams(unittest.TestCase): + """Tests that new parameters flow through to the payload for scrape_pricing.""" + + def test_pricing_locale(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.amazon.scrape_pricing("B09V3KXJPB", locale="en-us") + + self.assertEqual(captured["locale"], "en-us") + + def test_pricing_context(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + ctx = [{"key": "currency", "value": "USD"}] + client.amazon.scrape_pricing("B09V3KXJPB", context=ctx) + + self.assertEqual(captured["context"], ctx) + + +class TestAmazonBestsellersParams(unittest.TestCase): + """Tests that new parameters flow through to the payload for scrape_bestsellers.""" + + def test_bestsellers_locale(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.amazon.scrape_bestsellers("11091801", locale="en-us") + + self.assertEqual(captured["locale"], "en-us") + + def test_bestsellers_context(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + ctx = [{"key": "currency", "value": "USD"}] + client.amazon.scrape_bestsellers("11091801", context=ctx) + + self.assertEqual(captured["context"], ctx) + + +class TestAmazonSellersParams(unittest.TestCase): + """Tests that new parameters flow through to the payload for scrape_sellers.""" + + def test_sellers_locale(self): + client = RealtimeClient('user', 'pass') + api = client.amazon._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.amazon.scrape_sellers("ATVPDKIKX0DER", locale="en-us") + + self.assertEqual(captured["locale"], "en-us") diff --git a/tests/sources/avnet/__init__.py b/tests/sources/avnet/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/avnet/test_avnet.py b/tests/sources/avnet/test_avnet.py new file mode 100644 index 0000000..656d1f6 --- /dev/null +++ b/tests/sources/avnet/test_avnet.py @@ -0,0 +1,27 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestAvnetSearchSync(unittest.TestCase): + """Tests that scrape_search parameters flow through to the payload.""" + + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.avnet._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.avnet.scrape_search("case") + + self.assertEqual(captured["source"], "avnet_search") + self.assertEqual(captured["query"], "case") + + def test_search_start_page(self): + client = RealtimeClient('user', 'pass') + api = client.avnet._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.avnet.scrape_search("case", start_page=3) + + self.assertEqual(captured["start_page"], 3) diff --git a/tests/sources/bedbathandbeyond/__init__.py b/tests/sources/bedbathandbeyond/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/bedbathandbeyond/test_bedbathandbeyond.py b/tests/sources/bedbathandbeyond/test_bedbathandbeyond.py new file mode 100644 index 0000000..80d6e4f --- /dev/null +++ b/tests/sources/bedbathandbeyond/test_bedbathandbeyond.py @@ -0,0 +1,41 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestBedbathandbeyondSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.bedbathandbeyond._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bedbathandbeyond.scrape_search("table") + + self.assertEqual(captured["source"], "bedbathandbeyond_search") + self.assertEqual(captured["query"], "table") + + +class TestBedbathandbeyondProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.bedbathandbeyond._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bedbathandbeyond.scrape_product("12345678") + + self.assertEqual(captured["source"], "bedbathandbeyond_product") + self.assertEqual(captured["product_id"], "12345678") + + +class TestBedbathandbeyondUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.bedbathandbeyond._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bedbathandbeyond.scrape_url("https://www.bedbathandbeyond.com/store/product/123") + + self.assertEqual(captured["source"], "bedbathandbeyond") + self.assertEqual(captured["url"], "https://www.bedbathandbeyond.com/store/product/123") diff --git a/tests/sources/bestbuy/__init__.py b/tests/sources/bestbuy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/bestbuy/test_bestbuy.py b/tests/sources/bestbuy/test_bestbuy.py new file mode 100644 index 0000000..005aab9 --- /dev/null +++ b/tests/sources/bestbuy/test_bestbuy.py @@ -0,0 +1,53 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestBestbuySearchSync(unittest.TestCase): + """Tests that scrape_search parameters flow through to the payload.""" + + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.bestbuy._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bestbuy.scrape_search("laptop") + + self.assertEqual(captured["source"], "bestbuy_search") + self.assertEqual(captured["query"], "laptop") + + def test_search_fulfillment(self): + client = RealtimeClient('user', 'pass') + api = client.bestbuy._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bestbuy.scrape_search("laptop", store_id=123, fulfillment_type="pickup") + + self.assertEqual(captured["store_id"], 123) + self.assertEqual(captured["fulfillment_type"], "pickup") + + +class TestBestbuyProductSync(unittest.TestCase): + """Tests that scrape_product parameters flow through to the payload.""" + + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.bestbuy._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bestbuy.scrape_product("6525410") + + self.assertEqual(captured["source"], "bestbuy_product") + self.assertEqual(captured["product_id"], "6525410") + + def test_product_parse(self): + client = RealtimeClient('user', 'pass') + api = client.bestbuy._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bestbuy.scrape_product("6525410", parse=True) + + self.assertEqual(captured["parse"], True) diff --git a/tests/sources/bodegaaurrera/__init__.py b/tests/sources/bodegaaurrera/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/bodegaaurrera/test_bodegaaurrera.py b/tests/sources/bodegaaurrera/test_bodegaaurrera.py new file mode 100644 index 0000000..6b0fb58 --- /dev/null +++ b/tests/sources/bodegaaurrera/test_bodegaaurrera.py @@ -0,0 +1,49 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestBodegaaurrerapSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.bodegaaurrera._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bodegaaurrera.scrape_search("arroz") + + self.assertEqual(captured["source"], "bodegaaurrera_search") + self.assertEqual(captured["query"], "arroz") + + def test_search_subdomain(self): + client = RealtimeClient('user', 'pass') + api = client.bodegaaurrera._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bodegaaurrera.scrape_search("arroz", subdomain="despensa") + + self.assertEqual(captured["subdomain"], "despensa") + + +class TestBodegaaurreraProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.bodegaaurrera._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bodegaaurrera.scrape_product("123456") + + self.assertEqual(captured["source"], "bodegaaurrera_product") + + +class TestBodegaaurreraUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.bodegaaurrera._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.bodegaaurrera.scrape_url("https://www.bodegaaurrera.com.mx/") + + self.assertEqual(captured["source"], "bodegaaurrera") diff --git a/tests/sources/cdiscount/__init__.py b/tests/sources/cdiscount/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/cdiscount/test_cdiscount.py b/tests/sources/cdiscount/test_cdiscount.py new file mode 100644 index 0000000..8aa7f45 --- /dev/null +++ b/tests/sources/cdiscount/test_cdiscount.py @@ -0,0 +1,38 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestCdiscountSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.cdiscount._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.cdiscount.scrape_search("tv") + + self.assertEqual(captured["source"], "cdiscount_search") + + +class TestCdiscountProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.cdiscount._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.cdiscount.scrape_product("123456") + + self.assertEqual(captured["source"], "cdiscount_product") + + +class TestCdiscountUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.cdiscount._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.cdiscount.scrape_url("https://www.cdiscount.com/") + + self.assertEqual(captured["source"], "cdiscount") diff --git a/tests/sources/chatgpt/__init__.py b/tests/sources/chatgpt/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/chatgpt/test_chatgpt.py b/tests/sources/chatgpt/test_chatgpt.py new file mode 100644 index 0000000..2a782b8 --- /dev/null +++ b/tests/sources/chatgpt/test_chatgpt.py @@ -0,0 +1,47 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestChatgptSync(unittest.TestCase): + """Tests that scrape parameters flow through to the payload.""" + + def test_scrape_source(self): + client = RealtimeClient('user', 'pass') + api = client.chatgpt._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.chatgpt.scrape("best supplements for better sleep") + + self.assertEqual(captured["source"], "chatgpt") + self.assertEqual(captured["prompt"], "best supplements for better sleep") + + def test_scrape_search(self): + client = RealtimeClient('user', 'pass') + api = client.chatgpt._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.chatgpt.scrape("best supplements", search=True) + + self.assertEqual(captured["search"], True) + + def test_scrape_geo_location(self): + client = RealtimeClient('user', 'pass') + api = client.chatgpt._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.chatgpt.scrape("best supplements", geo_location="United States") + + self.assertEqual(captured["geo_location"], "United States") + + def test_scrape_parse(self): + client = RealtimeClient('user', 'pass') + api = client.chatgpt._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.chatgpt.scrape("best supplements", parse=True) + + self.assertEqual(captured["parse"], True) diff --git a/tests/sources/costco/__init__.py b/tests/sources/costco/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/costco/test_costco.py b/tests/sources/costco/test_costco.py new file mode 100644 index 0000000..256e9f1 --- /dev/null +++ b/tests/sources/costco/test_costco.py @@ -0,0 +1,48 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestCostcoSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.costco._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.costco.scrape_search("milk") + + self.assertEqual(captured["source"], "costco_search") + + def test_search_domain(self): + client = RealtimeClient('user', 'pass') + api = client.costco._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.costco.scrape_search("milk", domain="ca") + + self.assertEqual(captured["domain"], "ca") + + +class TestCostcoProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.costco._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.costco.scrape_product("123456") + + self.assertEqual(captured["source"], "costco_product") + + +class TestCostcoUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.costco._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.costco.scrape_url("https://www.costco.com/") + + self.assertEqual(captured["source"], "costco") diff --git a/tests/sources/dcard/__init__.py b/tests/sources/dcard/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/dcard/test_dcard.py b/tests/sources/dcard/test_dcard.py new file mode 100644 index 0000000..6d52f6a --- /dev/null +++ b/tests/sources/dcard/test_dcard.py @@ -0,0 +1,27 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestDcardSearchSync(unittest.TestCase): + """Tests that scrape_search parameters flow through to the payload.""" + + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.dcard._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.dcard.scrape_search("tv") + + self.assertEqual(captured["source"], "dcard_search") + self.assertEqual(captured["query"], "tv") + + def test_search_render(self): + client = RealtimeClient('user', 'pass') + api = client.dcard._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.dcard.scrape_search("tv", render="html") + + self.assertEqual(captured["render"], "html") diff --git a/tests/sources/ebay/__init__.py b/tests/sources/ebay/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/ebay/test_ebay.py b/tests/sources/ebay/test_ebay.py new file mode 100644 index 0000000..7510a82 --- /dev/null +++ b/tests/sources/ebay/test_ebay.py @@ -0,0 +1,87 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestEbaySearchSync(unittest.TestCase): + """Tests that scrape_search parameters flow through to the payload.""" + + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.ebay._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.ebay.scrape_search("laptop") + + self.assertEqual(captured["source"], "ebay_search") + self.assertEqual(captured["query"], "laptop") + + def test_search_domain(self): + client = RealtimeClient('user', 'pass') + api = client.ebay._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.ebay.scrape_search("laptop", domain="co.uk") + + self.assertEqual(captured["domain"], "co.uk") + + def test_search_start_page(self): + client = RealtimeClient('user', 'pass') + api = client.ebay._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.ebay.scrape_search("laptop", start_page=3) + + self.assertEqual(captured["start_page"], 3) + + +class TestEbayProductSync(unittest.TestCase): + """Tests that scrape_product parameters flow through to the payload.""" + + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.ebay._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.ebay.scrape_product("123456789") + + self.assertEqual(captured["source"], "ebay_product") + self.assertEqual(captured["product_id"], "123456789") + + def test_product_domain(self): + client = RealtimeClient('user', 'pass') + api = client.ebay._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.ebay.scrape_product("123456789", domain="de") + + self.assertEqual(captured["domain"], "de") + + +class TestEbayUrlSync(unittest.TestCase): + """Tests that scrape_url parameters flow through to the payload.""" + + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.ebay._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.ebay.scrape_url("https://www.ebay.com/itm/123456789") + + self.assertEqual(captured["source"], "ebay") + self.assertEqual(captured["url"], "https://www.ebay.com/itm/123456789") + + def test_url_render(self): + client = RealtimeClient('user', 'pass') + api = client.ebay._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.ebay.scrape_url("https://www.ebay.com/itm/123456789", render="html") + + self.assertEqual(captured["render"], "html") diff --git a/tests/sources/etsy/__init__.py b/tests/sources/etsy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/etsy/test_etsy.py b/tests/sources/etsy/test_etsy.py new file mode 100644 index 0000000..9ff5c5b --- /dev/null +++ b/tests/sources/etsy/test_etsy.py @@ -0,0 +1,67 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestEtsySearchSync(unittest.TestCase): + """Tests that scrape_search parameters flow through to the payload.""" + + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.etsy._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.etsy.scrape_search("handmade jewelry") + + self.assertEqual(captured["source"], "etsy_search") + self.assertEqual(captured["query"], "handmade jewelry") + + def test_search_store_id(self): + client = RealtimeClient('user', 'pass') + api = client.etsy._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.etsy.scrape_search("jewelry", store_id=12345) + + self.assertEqual(captured["store_id"], 12345) + + +class TestEtsyProductSync(unittest.TestCase): + """Tests that scrape_product parameters flow through to the payload.""" + + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.etsy._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.etsy.scrape_product("1234567890") + + self.assertEqual(captured["source"], "etsy_product") + self.assertEqual(captured["product_id"], "1234567890") + + def test_product_parse(self): + client = RealtimeClient('user', 'pass') + api = client.etsy._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.etsy.scrape_product("1234567890", parse=True) + + self.assertEqual(captured["parse"], True) + + +class TestEtsyUrlSync(unittest.TestCase): + """Tests that scrape_url parameters flow through to the payload.""" + + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.etsy._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.etsy.scrape_url("https://www.etsy.com/listing/123456") + + self.assertEqual(captured["source"], "etsy") + self.assertEqual(captured["url"], "https://www.etsy.com/listing/123456") diff --git a/tests/sources/falabella/__init__.py b/tests/sources/falabella/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/falabella/test_falabella.py b/tests/sources/falabella/test_falabella.py new file mode 100644 index 0000000..8787a5c --- /dev/null +++ b/tests/sources/falabella/test_falabella.py @@ -0,0 +1,38 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestFalabellaSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.falabella._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.falabella.scrape_search("zapatos") + + self.assertEqual(captured["source"], "falabella_search") + + +class TestFalabellaProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.falabella._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.falabella.scrape_product("123456") + + self.assertEqual(captured["source"], "falabella_product") + + +class TestFalabellaUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.falabella._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.falabella.scrape_url("https://www.falabella.com/") + + self.assertEqual(captured["source"], "falabella") diff --git a/tests/sources/flipkart/__init__.py b/tests/sources/flipkart/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/flipkart/test_flipkart.py b/tests/sources/flipkart/test_flipkart.py new file mode 100644 index 0000000..109dd56 --- /dev/null +++ b/tests/sources/flipkart/test_flipkart.py @@ -0,0 +1,38 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestFlipkartSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.flipkart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.flipkart.scrape_search("phone") + + self.assertEqual(captured["source"], "flipkart_search") + + +class TestFlipkartProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.flipkart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.flipkart.scrape_product("123456") + + self.assertEqual(captured["source"], "flipkart_product") + + +class TestFlipkartUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.flipkart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.flipkart.scrape_url("https://www.flipkart.com/") + + self.assertEqual(captured["source"], "flipkart") diff --git a/tests/sources/google/__init__.py b/tests/sources/google/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/google/test_google.py b/tests/sources/google/test_google.py new file mode 100644 index 0000000..9938564 --- /dev/null +++ b/tests/sources/google/test_google.py @@ -0,0 +1,106 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestGoogleAiModeSync(unittest.TestCase): + """Tests that scrape_ai_mode parameters flow through to the payload.""" + + def test_ai_mode_default_render(self): + client = RealtimeClient('user', 'pass') + api = client.google._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google.scrape_ai_mode("what is python") + + self.assertEqual(captured["source"], "google_ai_mode") + self.assertEqual(captured["query"], "what is python") + self.assertEqual(captured["render"], "html") + + def test_ai_mode_parse(self): + client = RealtimeClient('user', 'pass') + api = client.google._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google.scrape_ai_mode("what is python", parse=True) + + self.assertEqual(captured["parse"], True) + + def test_ai_mode_geo_location(self): + client = RealtimeClient('user', 'pass') + api = client.google._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google.scrape_ai_mode("what is python", geo_location="United States") + + self.assertEqual(captured["geo_location"], "United States") + + +class TestGoogleNewsSync(unittest.TestCase): + """Tests that scrape_news parameters flow through to the payload.""" + + def test_news_injects_tbm(self): + client = RealtimeClient('user', 'pass') + api = client.google._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google.scrape_news("breaking news") + + self.assertEqual(captured["source"], "google_search") + self.assertEqual(captured["query"], "breaking news") + tbm_found = False + for item in captured["context"]: + if item.get("key") == "tbm": + self.assertEqual(item["value"], "nws") + tbm_found = True + self.assertTrue(tbm_found, "tbm=nws not found in context") + + def test_news_preserves_existing_context(self): + client = RealtimeClient('user', 'pass') + api = client.google._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + ctx = [{"key": "safe_search", "value": "true"}] + client.google.scrape_news("breaking news", context=ctx) + + keys = [item["key"] for item in captured["context"]] + self.assertIn("safe_search", keys) + self.assertIn("tbm", keys) + + def test_news_does_not_override_tbm(self): + client = RealtimeClient('user', 'pass') + api = client.google._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + ctx = [{"key": "tbm", "value": "nws"}] + client.google.scrape_news("breaking news", context=ctx) + + tbm_count = sum(1 for item in captured["context"] if item.get("key") == "tbm") + self.assertEqual(tbm_count, 1) + + def test_news_domain(self): + client = RealtimeClient('user', 'pass') + api = client.google._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google.scrape_news("breaking news", domain="co.uk") + + self.assertEqual(captured["domain"], "co.uk") + + def test_news_pagination(self): + client = RealtimeClient('user', 'pass') + api = client.google._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google.scrape_news("breaking news", start_page=2, pages=3, limit=5) + + self.assertEqual(captured["start_page"], 2) + self.assertEqual(captured["pages"], 3) + self.assertEqual(captured["limit"], 5) diff --git a/tests/sources/google_shopping/__init__.py b/tests/sources/google_shopping/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/google_shopping/test_google_shopping.py b/tests/sources/google_shopping/test_google_shopping.py new file mode 100644 index 0000000..96c987f --- /dev/null +++ b/tests/sources/google_shopping/test_google_shopping.py @@ -0,0 +1,96 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestGoogleShoppingSearchSync(unittest.TestCase): + """Tests that scrape_shopping_search parameters flow through to the payload.""" + + def test_shopping_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.google_shopping._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google_shopping.scrape_shopping_search("laptop") + + self.assertEqual(captured["source"], "google_shopping_search") + self.assertEqual(captured["query"], "laptop") + + def test_shopping_search_pagination(self): + client = RealtimeClient('user', 'pass') + api = client.google_shopping._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google_shopping.scrape_shopping_search("laptop", start_page=2, pages=3) + + self.assertEqual(captured["start_page"], 2) + self.assertEqual(captured["pages"], 3) + + def test_shopping_search_locale(self): + client = RealtimeClient('user', 'pass') + api = client.google_shopping._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google_shopping.scrape_shopping_search("laptop", locale="en") + + self.assertEqual(captured["locale"], "en") + + +class TestGoogleShoppingUrlSync(unittest.TestCase): + """Tests that scrape_shopping_url parameters flow through to the payload.""" + + def test_shopping_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.google_shopping._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google_shopping.scrape_shopping_url("https://shopping.google.com/search?q=laptop") + + self.assertEqual(captured["source"], "google_shopping") + self.assertEqual(captured["url"], "https://shopping.google.com/search?q=laptop") + + def test_shopping_url_render(self): + client = RealtimeClient('user', 'pass') + api = client.google_shopping._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google_shopping.scrape_shopping_url("https://shopping.google.com/search?q=laptop", render="html") + + self.assertEqual(captured["render"], "html") + + +class TestGoogleShoppingProductsSync(unittest.TestCase): + """Tests that scrape_shopping_products parameters flow through to the payload.""" + + def test_shopping_products_source(self): + client = RealtimeClient('user', 'pass') + api = client.google_shopping._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google_shopping.scrape_shopping_products("product_token_123") + + self.assertEqual(captured["source"], "google_shopping_product") + self.assertEqual(captured["query"], "product_token_123") + + def test_shopping_products_locale(self): + client = RealtimeClient('user', 'pass') + api = client.google_shopping._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.google_shopping.scrape_shopping_products("product_token_123", locale="de") + + self.assertEqual(captured["locale"], "de") + + +class TestGoogleShoppingPricingRemoved(unittest.TestCase): + """Tests that scrape_product_pricing has been removed (deprecated Oct 2025).""" + + def test_product_pricing_removed(self): + client = RealtimeClient('user', 'pass') + self.assertFalse(hasattr(client.google_shopping, 'scrape_product_pricing')) diff --git a/tests/sources/grainger/__init__.py b/tests/sources/grainger/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/grainger/test_grainger.py b/tests/sources/grainger/test_grainger.py new file mode 100644 index 0000000..a55e5b4 --- /dev/null +++ b/tests/sources/grainger/test_grainger.py @@ -0,0 +1,48 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestGraingerSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.grainger._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.grainger.scrape_search("screws") + + self.assertEqual(captured["source"], "grainger_search") + + def test_search_domain(self): + client = RealtimeClient('user', 'pass') + api = client.grainger._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.grainger.scrape_search("screws", domain="com.mx") + + self.assertEqual(captured["domain"], "com.mx") + + +class TestGraingerProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.grainger._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.grainger.scrape_product("123456") + + self.assertEqual(captured["source"], "grainger_product") + + +class TestGraingerUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.grainger._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.grainger.scrape_url("https://www.grainger.com/") + + self.assertEqual(captured["source"], "grainger") diff --git a/tests/sources/idealo/__init__.py b/tests/sources/idealo/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/idealo/test_idealo.py b/tests/sources/idealo/test_idealo.py new file mode 100644 index 0000000..8dfd8af --- /dev/null +++ b/tests/sources/idealo/test_idealo.py @@ -0,0 +1,15 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestIdealoSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.idealo._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.idealo.scrape_search("iphone") + + self.assertEqual(captured["source"], "idealo_search") + self.assertEqual(captured["query"], "iphone") diff --git a/tests/sources/indiamart/__init__.py b/tests/sources/indiamart/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/indiamart/test_indiamart.py b/tests/sources/indiamart/test_indiamart.py new file mode 100644 index 0000000..c4f710b --- /dev/null +++ b/tests/sources/indiamart/test_indiamart.py @@ -0,0 +1,38 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestIndiamartSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.indiamart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.indiamart.scrape_search("machinery") + + self.assertEqual(captured["source"], "indiamart_search") + + +class TestIndiamartProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.indiamart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.indiamart.scrape_product("123456") + + self.assertEqual(captured["source"], "indiamart_product") + + +class TestIndiamartUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.indiamart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.indiamart.scrape_url("https://www.indiamart.com/") + + self.assertEqual(captured["source"], "indiamart") diff --git a/tests/sources/instacart/__init__.py b/tests/sources/instacart/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/instacart/test_instacart.py b/tests/sources/instacart/test_instacart.py new file mode 100644 index 0000000..fe7ab21 --- /dev/null +++ b/tests/sources/instacart/test_instacart.py @@ -0,0 +1,38 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestInstacartSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.instacart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.instacart.scrape_search("milk") + + self.assertEqual(captured["source"], "instacart_search") + + +class TestInstacartProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.instacart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.instacart.scrape_product("123") + + self.assertEqual(captured["source"], "instacart_product") + + +class TestInstacartUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.instacart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.instacart.scrape_url("https://www.instacart.com/") + + self.assertEqual(captured["source"], "instacart") diff --git a/tests/sources/kroger/__init__.py b/tests/sources/kroger/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/kroger/test_kroger.py b/tests/sources/kroger/test_kroger.py new file mode 100644 index 0000000..9cfcbb2 --- /dev/null +++ b/tests/sources/kroger/test_kroger.py @@ -0,0 +1,68 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestKrogerSearchSync(unittest.TestCase): + """Tests that scrape_search parameters flow through to the payload.""" + + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.kroger._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.kroger.scrape_search("milk") + + self.assertEqual(captured["source"], "kroger_search") + self.assertEqual(captured["query"], "milk") + + def test_search_store_id(self): + client = RealtimeClient('user', 'pass') + api = client.kroger._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.kroger.scrape_search("milk", store_id=70100456) + + self.assertEqual(captured["store_id"], 70100456) + + def test_search_context(self): + client = RealtimeClient('user', 'pass') + api = client.kroger._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + ctx = [{"key": "price_range", "value": "1.00-5.00"}] + client.kroger.scrape_search("milk", context=ctx) + + self.assertEqual(captured["context"], ctx) + + +class TestKrogerProductSync(unittest.TestCase): + """Tests that scrape_product parameters flow through to the payload.""" + + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.kroger._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.kroger.scrape_product("0001111041700") + + self.assertEqual(captured["source"], "kroger_product") + self.assertEqual(captured["product_id"], "0001111041700") + + +class TestKrogerUrlSync(unittest.TestCase): + """Tests that scrape_url parameters flow through to the payload.""" + + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.kroger._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.kroger.scrape_url("https://www.kroger.com/p/milk/0001111041700") + + self.assertEqual(captured["source"], "kroger") + self.assertEqual(captured["url"], "https://www.kroger.com/p/milk/0001111041700") diff --git a/tests/sources/lazada/__init__.py b/tests/sources/lazada/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/lazada/test_lazada.py b/tests/sources/lazada/test_lazada.py new file mode 100644 index 0000000..a54c12a --- /dev/null +++ b/tests/sources/lazada/test_lazada.py @@ -0,0 +1,38 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestLazadaSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.lazada._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.lazada.scrape_search("dress") + + self.assertEqual(captured["source"], "lazada_search") + + +class TestLazadaProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.lazada._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.lazada.scrape_product("123456") + + self.assertEqual(captured["source"], "lazada_product") + + +class TestLazadaUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.lazada._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.lazada.scrape_url("https://www.lazada.com/") + + self.assertEqual(captured["source"], "lazada") diff --git a/tests/sources/lowes/__init__.py b/tests/sources/lowes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/lowes/test_lowes.py b/tests/sources/lowes/test_lowes.py new file mode 100644 index 0000000..289acac --- /dev/null +++ b/tests/sources/lowes/test_lowes.py @@ -0,0 +1,50 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestLowesSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.lowes._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.lowes.scrape_search("hammer") + + self.assertEqual(captured["source"], "lowes_search") + + def test_search_filters(self): + client = RealtimeClient('user', 'pass') + api = client.lowes._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.lowes.scrape_search("hammer", store_id=123, free_delivery=True, pickup_today=True) + + self.assertEqual(captured["store_id"], 123) + self.assertEqual(captured["free_delivery"], True) + self.assertEqual(captured["pickup_today"], True) + + +class TestLowesProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.lowes._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.lowes.scrape_product("123") + + self.assertEqual(captured["source"], "lowes_product") + + +class TestLowesUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.lowes._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.lowes.scrape_url("https://www.lowes.com/") + + self.assertEqual(captured["source"], "lowes") diff --git a/tests/sources/magazineluiza/__init__.py b/tests/sources/magazineluiza/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/magazineluiza/test_magazineluiza.py b/tests/sources/magazineluiza/test_magazineluiza.py new file mode 100644 index 0000000..ba26044 --- /dev/null +++ b/tests/sources/magazineluiza/test_magazineluiza.py @@ -0,0 +1,38 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestMagazineluizaSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.magazineluiza._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.magazineluiza.scrape_search("telefone") + + self.assertEqual(captured["source"], "magazineluiza_search") + + +class TestMagazineluizaProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.magazineluiza._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.magazineluiza.scrape_product("123456") + + self.assertEqual(captured["source"], "magazineluiza_product") + + +class TestMagazineluizaUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.magazineluiza._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.magazineluiza.scrape_url("https://www.magazineluiza.com.br/") + + self.assertEqual(captured["source"], "magazineluiza") diff --git a/tests/sources/mediamarkt/__init__.py b/tests/sources/mediamarkt/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/mediamarkt/test_mediamarkt.py b/tests/sources/mediamarkt/test_mediamarkt.py new file mode 100644 index 0000000..9f5b575 --- /dev/null +++ b/tests/sources/mediamarkt/test_mediamarkt.py @@ -0,0 +1,48 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestMediamarktSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.mediamarkt._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.mediamarkt.scrape_search("headphones") + + self.assertEqual(captured["source"], "mediamarkt_search") + + def test_search_domain(self): + client = RealtimeClient('user', 'pass') + api = client.mediamarkt._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.mediamarkt.scrape_search("headphones", domain="es") + + self.assertEqual(captured["domain"], "es") + + +class TestMediamarktProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.mediamarkt._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.mediamarkt.scrape_product("123456") + + self.assertEqual(captured["source"], "mediamarkt_product") + + +class TestMediamarktUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.mediamarkt._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.mediamarkt.scrape_url("https://www.mediamarkt.de/") + + self.assertEqual(captured["source"], "mediamarkt") diff --git a/tests/sources/menards/__init__.py b/tests/sources/menards/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/menards/test_menards.py b/tests/sources/menards/test_menards.py new file mode 100644 index 0000000..3c01a17 --- /dev/null +++ b/tests/sources/menards/test_menards.py @@ -0,0 +1,50 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestMenardsSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.menards._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.menards.scrape_search("lumber") + + self.assertEqual(captured["source"], "menards_search") + + def test_search_filters(self): + client = RealtimeClient('user', 'pass') + api = client.menards._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.menards.scrape_search("lumber", in_stock_today=True, pickup_at_store_eligible=True) + + self.assertEqual(captured["in_stock_today"], True) + self.assertEqual(captured["pickup_at_store_eligible"], True) + + +class TestMenardsProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.menards._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.menards.scrape_product("123", store_id="456") + + self.assertEqual(captured["source"], "menards_product") + self.assertEqual(captured["store_id"], "456") + + +class TestMenardsUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.menards._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.menards.scrape_url("https://www.menards.com/") + + self.assertEqual(captured["source"], "menards") diff --git a/tests/sources/mercadolibre/__init__.py b/tests/sources/mercadolibre/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/mercadolibre/test_mercadolibre.py b/tests/sources/mercadolibre/test_mercadolibre.py new file mode 100644 index 0000000..4c5bbfa --- /dev/null +++ b/tests/sources/mercadolibre/test_mercadolibre.py @@ -0,0 +1,38 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestMercadolibreSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.mercadolibre._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.mercadolibre.scrape_search("iphone") + + self.assertEqual(captured["source"], "mercadolibre_search") + + +class TestMercadolibreProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.mercadolibre._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.mercadolibre.scrape_product("MLA123456") + + self.assertEqual(captured["source"], "mercadolibre_product") + + +class TestMercadolibreUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.mercadolibre._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.mercadolibre.scrape_url("https://www.mercadolibre.com/") + + self.assertEqual(captured["source"], "mercadolibre") diff --git a/tests/sources/mercadolivre/__init__.py b/tests/sources/mercadolivre/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/mercadolivre/test_mercadolivre.py b/tests/sources/mercadolivre/test_mercadolivre.py new file mode 100644 index 0000000..a1bf18c --- /dev/null +++ b/tests/sources/mercadolivre/test_mercadolivre.py @@ -0,0 +1,26 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestMercadolivreSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.mercadolivre._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.mercadolivre.scrape_search("celular") + + self.assertEqual(captured["source"], "mercadolivre_search") + + +class TestMercadolivreProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.mercadolivre._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.mercadolivre.scrape_product("MLB123456") + + self.assertEqual(captured["source"], "mercadolivre_product") diff --git a/tests/sources/perplexity/__init__.py b/tests/sources/perplexity/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/perplexity/test_perplexity.py b/tests/sources/perplexity/test_perplexity.py new file mode 100644 index 0000000..f41cd75 --- /dev/null +++ b/tests/sources/perplexity/test_perplexity.py @@ -0,0 +1,37 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestPerplexitySync(unittest.TestCase): + """Tests that scrape parameters flow through to the payload.""" + + def test_scrape_source(self): + client = RealtimeClient('user', 'pass') + api = client.perplexity._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.perplexity.scrape("top 3 smartphones in 2025") + + self.assertEqual(captured["source"], "perplexity") + self.assertEqual(captured["prompt"], "top 3 smartphones in 2025") + + def test_scrape_geo_location(self): + client = RealtimeClient('user', 'pass') + api = client.perplexity._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.perplexity.scrape("smartphones", geo_location="United States") + + self.assertEqual(captured["geo_location"], "United States") + + def test_scrape_parse(self): + client = RealtimeClient('user', 'pass') + api = client.perplexity._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.perplexity.scrape("smartphones", parse=True) + + self.assertEqual(captured["parse"], True) diff --git a/tests/sources/petco/__init__.py b/tests/sources/petco/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/petco/test_petco.py b/tests/sources/petco/test_petco.py new file mode 100644 index 0000000..114f371 --- /dev/null +++ b/tests/sources/petco/test_petco.py @@ -0,0 +1,47 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestPetcoSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.petco._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.petco.scrape_search("dog food") + + self.assertEqual(captured["source"], "petco_search") + + def test_search_store_fulfillment(self): + client = RealtimeClient('user', 'pass') + api = client.petco._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.petco.scrape_search("dog food", store_id="123", fulfillment_type="free_pickup_today") + + self.assertEqual(captured["store_id"], "123") + self.assertEqual(captured["fulfillment_type"], "free_pickup_today") + + +class TestPetcoUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.petco._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.petco.scrape_url("https://www.petco.com/") + + self.assertEqual(captured["source"], "petco") + + def test_url_store(self): + client = RealtimeClient('user', 'pass') + api = client.petco._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.petco.scrape_url("https://www.petco.com/", store_id=456) + + self.assertEqual(captured["store_id"], 456) diff --git a/tests/sources/publix/__init__.py b/tests/sources/publix/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/publix/test_publix.py b/tests/sources/publix/test_publix.py new file mode 100644 index 0000000..ef62450 --- /dev/null +++ b/tests/sources/publix/test_publix.py @@ -0,0 +1,48 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestPublixSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.publix._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.publix.scrape_search("bread") + + self.assertEqual(captured["source"], "publix_search") + + def test_search_store(self): + client = RealtimeClient('user', 'pass') + api = client.publix._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.publix.scrape_search("bread", store_id=123) + + self.assertEqual(captured["store_id"], 123) + + +class TestPublixProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.publix._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.publix.scrape_product("12345") + + self.assertEqual(captured["source"], "publix_product") + + +class TestPublixUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.publix._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.publix.scrape_url("https://www.publix.com/") + + self.assertEqual(captured["source"], "publix") diff --git a/tests/sources/rakuten/__init__.py b/tests/sources/rakuten/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/rakuten/test_rakuten.py b/tests/sources/rakuten/test_rakuten.py new file mode 100644 index 0000000..d8eabbd --- /dev/null +++ b/tests/sources/rakuten/test_rakuten.py @@ -0,0 +1,26 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestRakutenSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.rakuten._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.rakuten.scrape_search("shoes") + + self.assertEqual(captured["source"], "rakuten_search") + + +class TestRakutenUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.rakuten._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.rakuten.scrape_url("https://www.rakuten.com.tw/") + + self.assertEqual(captured["source"], "rakuten") diff --git a/tests/sources/shein/__init__.py b/tests/sources/shein/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/shein/test_shein.py b/tests/sources/shein/test_shein.py new file mode 100644 index 0000000..0883b12 --- /dev/null +++ b/tests/sources/shein/test_shein.py @@ -0,0 +1,27 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestSheinSearchSync(unittest.TestCase): + """Tests that scrape_search parameters flow through to the payload.""" + + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.shein._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.shein.scrape_search("dress") + + self.assertEqual(captured["source"], "shein_search") + self.assertEqual(captured["query"], "dress") + + def test_search_domain(self): + client = RealtimeClient('user', 'pass') + api = client.shein._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.shein.scrape_search("dress", domain="com.mx") + + self.assertEqual(captured["domain"], "com.mx") diff --git a/tests/sources/staples/__init__.py b/tests/sources/staples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/staples/test_staples.py b/tests/sources/staples/test_staples.py new file mode 100644 index 0000000..f80413f --- /dev/null +++ b/tests/sources/staples/test_staples.py @@ -0,0 +1,37 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestStaplesSearchSync(unittest.TestCase): + """Tests that scrape_search parameters flow through to the payload.""" + + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.staples._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.staples.scrape_search("chair") + + self.assertEqual(captured["source"], "staples_search") + self.assertEqual(captured["query"], "chair") + + def test_search_domain(self): + client = RealtimeClient('user', 'pass') + api = client.staples._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.staples.scrape_search("chair", domain="ca") + + self.assertEqual(captured["domain"], "ca") + + def test_search_start_page(self): + client = RealtimeClient('user', 'pass') + api = client.staples._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.staples.scrape_search("chair", start_page=2) + + self.assertEqual(captured["start_page"], 2) diff --git a/tests/sources/target_store/__init__.py b/tests/sources/target_store/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/target_store/test_target_store.py b/tests/sources/target_store/test_target_store.py new file mode 100644 index 0000000..d1fd49f --- /dev/null +++ b/tests/sources/target_store/test_target_store.py @@ -0,0 +1,63 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestTargetStoreSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.target_store._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.target_store.scrape_search("towels") + + self.assertEqual(captured["source"], "target_search") + + def test_search_fulfillment(self): + client = RealtimeClient('user', 'pass') + api = client.target_store._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.target_store.scrape_search("towels", store_id=123, fulfillment_type="pickup") + + self.assertEqual(captured["store_id"], 123) + self.assertEqual(captured["fulfillment_type"], "pickup") + + +class TestTargetStoreProductSync(unittest.TestCase): + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.target_store._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.target_store.scrape_product("A-12345678") + + self.assertEqual(captured["source"], "target_product") + self.assertEqual(captured["product_id"], "A-12345678") + + +class TestTargetStoreCategorySync(unittest.TestCase): + def test_category_source(self): + client = RealtimeClient('user', 'pass') + api = client.target_store._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.target_store.scrape_category("owq2q") + + self.assertEqual(captured["source"], "target_category") + self.assertEqual(captured["category_id"], "owq2q") + + +class TestTargetStoreUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.target_store._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.target_store.scrape_url("https://www.target.com/") + + self.assertEqual(captured["source"], "target") diff --git a/tests/sources/tiktok/__init__.py b/tests/sources/tiktok/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/tiktok/test_tiktok.py b/tests/sources/tiktok/test_tiktok.py new file mode 100644 index 0000000..6739401 --- /dev/null +++ b/tests/sources/tiktok/test_tiktok.py @@ -0,0 +1,77 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestTiktokShopSearchSync(unittest.TestCase): + """Tests that scrape_shop_search parameters flow through to the payload.""" + + def test_shop_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.tiktok._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.tiktok.scrape_shop_search("phone case") + + self.assertEqual(captured["source"], "tiktok_shop_search") + self.assertEqual(captured["query"], "phone case") + + def test_shop_search_render(self): + client = RealtimeClient('user', 'pass') + api = client.tiktok._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.tiktok.scrape_shop_search("phone case", render="html") + + self.assertEqual(captured["render"], "html") + + +class TestTiktokShopProductSync(unittest.TestCase): + """Tests that scrape_shop_product parameters flow through to the payload.""" + + def test_shop_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.tiktok._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.tiktok.scrape_shop_product("1729553810632530") + + self.assertEqual(captured["source"], "tiktok_shop_product") + self.assertEqual(captured["product_id"], "1729553810632530") + + def test_shop_product_domain(self): + client = RealtimeClient('user', 'pass') + api = client.tiktok._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.tiktok.scrape_shop_product("1729553810632530", domain="com") + + self.assertEqual(captured["domain"], "com") + + +class TestTiktokShopUrlSync(unittest.TestCase): + """Tests that scrape_shop_url parameters flow through to the payload.""" + + def test_shop_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.tiktok._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.tiktok.scrape_shop_url("https://www.tiktok.com/@shop/product/123") + + self.assertEqual(captured["source"], "tiktok") + self.assertEqual(captured["url"], "https://www.tiktok.com/@shop/product/123") + + def test_shop_url_user_agent(self): + client = RealtimeClient('user', 'pass') + api = client.tiktok._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.tiktok.scrape_shop_url("https://www.tiktok.com/@shop/product/123", user_agent_type="mobile") + + self.assertEqual(captured["user_agent_type"], "mobile") diff --git a/tests/sources/tokopedia/__init__.py b/tests/sources/tokopedia/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/tokopedia/test_tokopedia.py b/tests/sources/tokopedia/test_tokopedia.py new file mode 100644 index 0000000..0fd5f99 --- /dev/null +++ b/tests/sources/tokopedia/test_tokopedia.py @@ -0,0 +1,26 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestTokopediaSearchSync(unittest.TestCase): + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.tokopedia._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.tokopedia.scrape_search("shampoo") + + self.assertEqual(captured["source"], "tokopedia_search") + + +class TestTokopediaUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.tokopedia._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.tokopedia.scrape_url("https://www.tokopedia.com/") + + self.assertEqual(captured["source"], "tokopedia") diff --git a/tests/sources/universal/__init__.py b/tests/sources/universal/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/universal/test_universal.py b/tests/sources/universal/test_universal.py new file mode 100644 index 0000000..6476755 --- /dev/null +++ b/tests/sources/universal/test_universal.py @@ -0,0 +1,57 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestUniversalUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.universal._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.universal.scrape_url("https://www.example.com") + + self.assertEqual(captured["source"], "universal_ecommerce") + self.assertEqual(captured["url"], "https://www.example.com") + + def test_url_render(self): + client = RealtimeClient('user', 'pass') + api = client.universal._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.universal.scrape_url("https://www.example.com", render="html") + + self.assertEqual(captured["render"], "html") + + def test_url_parse(self): + client = RealtimeClient('user', 'pass') + api = client.universal._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.universal.scrape_url("https://www.example.com", parse=True) + + self.assertEqual(captured["parse"], True) + + def test_url_browser_instructions(self): + client = RealtimeClient('user', 'pass') + api = client.universal._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + instructions = [{"type": "click", "selector": ".btn"}] + client.universal.scrape_url("https://www.example.com", render="html", browser_instructions=instructions) + + self.assertEqual(captured["browser_instructions"], instructions) + + def test_url_context(self): + client = RealtimeClient('user', 'pass') + api = client.universal._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + ctx = [{"key": "http_method", "value": "post"}] + client.universal.scrape_url("https://www.example.com", context=ctx) + + self.assertEqual(captured["context"], ctx) \ No newline at end of file diff --git a/tests/sources/walmart/__init__.py b/tests/sources/walmart/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/walmart/test_walmart.py b/tests/sources/walmart/test_walmart.py new file mode 100644 index 0000000..54f92fb --- /dev/null +++ b/tests/sources/walmart/test_walmart.py @@ -0,0 +1,90 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestWalmartSearchSync(unittest.TestCase): + """Tests that scrape_search parameters flow through to the payload.""" + + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.walmart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.walmart.scrape_search("laptop") + + self.assertEqual(captured["source"], "walmart_search") + self.assertEqual(captured["query"], "laptop") + + def test_search_filters(self): + client = RealtimeClient('user', 'pass') + api = client.walmart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.walmart.scrape_search("laptop", min_price=100.0, max_price=500.0, sort_by="price_low") + + self.assertEqual(captured["min_price"], 100.0) + self.assertEqual(captured["max_price"], 500.0) + self.assertEqual(captured["sort_by"], "price_low") + + def test_search_fulfillment(self): + client = RealtimeClient('user', 'pass') + api = client.walmart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.walmart.scrape_search("laptop", delivery_zip="10001", fulfillment_speed="2_days") + + self.assertEqual(captured["delivery_zip"], "10001") + self.assertEqual(captured["fulfillment_speed"], "2_days") + + +class TestWalmartProductSync(unittest.TestCase): + """Tests that scrape_product parameters flow through to the payload.""" + + def test_product_source(self): + client = RealtimeClient('user', 'pass') + api = client.walmart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.walmart.scrape_product("123456789") + + self.assertEqual(captured["source"], "walmart_product") + self.assertEqual(captured["product_id"], "123456789") + + def test_product_store(self): + client = RealtimeClient('user', 'pass') + api = client.walmart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.walmart.scrape_product("123456789", store_id="1234") + + self.assertEqual(captured["store_id"], "1234") + + +class TestWalmartUrlSync(unittest.TestCase): + """Tests that scrape_url parameters flow through to the payload.""" + + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.walmart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.walmart.scrape_url("https://www.walmart.com/ip/123456789") + + self.assertEqual(captured["source"], "walmart") + self.assertEqual(captured["url"], "https://www.walmart.com/ip/123456789") + + def test_url_parse(self): + client = RealtimeClient('user', 'pass') + api = client.walmart._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.walmart.scrape_url("https://www.walmart.com/ip/123456789", parse=True) + + self.assertEqual(captured["parse"], True) diff --git a/tests/sources/youtube/__init__.py b/tests/sources/youtube/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/youtube/test_youtube.py b/tests/sources/youtube/test_youtube.py new file mode 100644 index 0000000..18eec0c --- /dev/null +++ b/tests/sources/youtube/test_youtube.py @@ -0,0 +1,246 @@ +import unittest +from oxylabs.internal import AsyncClient, RealtimeClient + + +class TestYoutubeTranscriptSync(unittest.TestCase): + """Tests that scrape_transcript parameters flow through to the payload.""" + + def test_transcript_source(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_transcript("dQw4w9WgXcQ") + + self.assertEqual(captured["source"], "youtube_transcript") + self.assertEqual(captured["query"], "dQw4w9WgXcQ") + + def test_transcript_context(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + ctx = [{"key": "language_code", "value": "en"}] + client.youtube.scrape_transcript("dQw4w9WgXcQ", context=ctx) + + self.assertEqual(captured["context"], ctx) + + +class TestYoutubeSearchSync(unittest.TestCase): + """Tests that scrape_search parameters flow through to the payload.""" + + def test_search_source(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_search("python tutorial") + + self.assertEqual(captured["source"], "youtube_search") + self.assertEqual(captured["query"], "python tutorial") + + def test_search_filters(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_search( + "python tutorial", + upload_date="this_week", + type="video", + duration="4-20", + sort_by="view_count", + ) + + self.assertEqual(captured["upload_date"], "this_week") + self.assertEqual(captured["type"], "video") + self.assertEqual(captured["duration"], "4-20") + self.assertEqual(captured["sort_by"], "view_count") + + def test_search_boolean_filters(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_search("python", hd=True, filter_4k=True, live=True) + + self.assertEqual(captured["hd"], True) + self.assertEqual(captured["4k"], True) + self.assertEqual(captured["live"], True) + + def test_search_numeric_key_filters(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_search("python", filter_360=True, filter_3d=True) + + self.assertEqual(captured["360"], True) + self.assertEqual(captured["3d"], True) + + +class TestYoutubeSearchMaxSync(unittest.TestCase): + """Tests that scrape_search_max uses the correct source.""" + + def test_search_max_source(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_search_max("python tutorial") + + self.assertEqual(captured["source"], "youtube_search_max") + + +class TestYoutubeMetadataSync(unittest.TestCase): + """Tests that scrape_metadata parameters flow through to the payload.""" + + def test_metadata_source_and_parse(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_metadata("dQw4w9WgXcQ") + + self.assertEqual(captured["source"], "youtube_metadata") + self.assertEqual(captured["parse"], True) + + +class TestYoutubeChannelSync(unittest.TestCase): + """Tests that scrape_channel parameters flow through to the payload.""" + + def test_channel_source(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_channel("@Oxylabs") + + self.assertEqual(captured["source"], "youtube_channel") + self.assertEqual(captured["channel_handle"], "@Oxylabs") + + def test_channel_limit(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_channel("@Oxylabs", limit=50) + + self.assertEqual(captured["limit"], 50) + + +class TestYoutubeSubtitlesSync(unittest.TestCase): + """Tests that scrape_subtitles parameters flow through to the payload.""" + + def test_subtitles_source(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + ctx = [{"key": "language_code", "value": "en"}] + client.youtube.scrape_subtitles("dQw4w9WgXcQ", context=ctx) + + self.assertEqual(captured["source"], "youtube_subtitles") + self.assertEqual(captured["context"], ctx) + + +class TestYoutubeVideoTrainabilitySync(unittest.TestCase): + """Tests that scrape_video_trainability parameters flow through to the payload.""" + + def test_trainability_source(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_video_trainability("dQw4w9WgXcQ") + + self.assertEqual(captured["source"], "youtube_video_trainability") + self.assertEqual(captured["video_id"], "dQw4w9WgXcQ") + + +class TestYoutubeAutocompleteSync(unittest.TestCase): + """Tests that scrape_autocomplete parameters flow through to the payload.""" + + def test_autocomplete_source(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_autocomplete("python") + + self.assertEqual(captured["source"], "youtube_autocomplete") + self.assertEqual(captured["query"], "python") + + def test_autocomplete_localization(self): + client = RealtimeClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.youtube.scrape_autocomplete("python", location="GB", language="en") + + self.assertEqual(captured["location"], "GB") + self.assertEqual(captured["language"], "en") + + +class TestYoutubeDownloadAsync(unittest.IsolatedAsyncioTestCase): + """Tests that scrape_download is only on async and params flow through.""" + + async def test_download_source_and_storage(self): + client = AsyncClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + async def mock_get_resp(payload, config): + captured.update(payload) + return {"mock": True} + api.get_response = mock_get_resp + + await client.youtube.scrape_download( + "dQw4w9WgXcQ", + storage_type="s3", + storage_url="s3://my-bucket/videos/", + ) + + self.assertEqual(captured["source"], "youtube_download") + self.assertEqual(captured["query"], "dQw4w9WgXcQ") + self.assertEqual(captured["storage_type"], "s3") + self.assertEqual(captured["storage_url"], "s3://my-bucket/videos/") + + async def test_download_context(self): + client = AsyncClient('user', 'pass') + api = client.youtube._api_instance + captured = {} + async def mock_get_resp(payload, config): + captured.update(payload) + return {"mock": True} + api.get_response = mock_get_resp + + ctx = [ + {"key": "download_type", "value": "video"}, + {"key": "video_quality", "value": "1080"}, + ] + await client.youtube.scrape_download( + "dQw4w9WgXcQ", + storage_type="s3", + storage_url="s3://my-bucket/videos/", + context=ctx, + ) + + self.assertEqual(captured["context"], ctx) + + def test_download_not_on_sync(self): + client = RealtimeClient('user', 'pass') + self.assertFalse(hasattr(client.youtube, 'scrape_download')) diff --git a/tests/sources/zillow/__init__.py b/tests/sources/zillow/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sources/zillow/test_zillow.py b/tests/sources/zillow/test_zillow.py new file mode 100644 index 0000000..e58e3d3 --- /dev/null +++ b/tests/sources/zillow/test_zillow.py @@ -0,0 +1,15 @@ +import unittest +from oxylabs.internal import RealtimeClient + + +class TestZillowUrlSync(unittest.TestCase): + def test_url_source(self): + client = RealtimeClient('user', 'pass') + api = client.zillow._api_instance + captured = {} + api._get_http_response = lambda payload, method, config: (captured.update(payload) or {"mock": True}) + + client.zillow.scrape_url("https://www.zillow.com/homes/for_sale/") + + self.assertEqual(captured["source"], "zillow") + self.assertEqual(captured["url"], "https://www.zillow.com/homes/for_sale/") From 976fc6deaabdb40df58c0d3b7540ac857d149026 Mon Sep 17 00:00:00 2001 From: Karolis Petkevicius Date: Fri, 6 Mar 2026 13:41:46 +0200 Subject: [PATCH 05/14] add new targets tests to the bash command --- scripts/tests.sh | 165 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 163 insertions(+), 2 deletions(-) diff --git a/scripts/tests.sh b/scripts/tests.sh index 7c56f74..6baea28 100755 --- a/scripts/tests.sh +++ b/scripts/tests.sh @@ -1,15 +1,176 @@ #!/bin/bash # Run source tests +python -m unittest tests.sources.amazon.test_amazon.TestAmazonSearchParams +python -m unittest tests.sources.amazon.test_amazon.TestAmazonUrlParams +python -m unittest tests.sources.amazon.test_amazon.TestAmazonProductParams +python -m unittest tests.sources.amazon.test_amazon.TestAmazonPricingParams +python -m unittest tests.sources.amazon.test_amazon.TestAmazonBestsellersParams +python -m unittest tests.sources.amazon.test_amazon.TestAmazonSellersParams + python -m unittest tests.sources.bing.test_bing.TestBingSearchSync -python -m unittest tests.sources.bing.test_bing.TestBingSearchAsync python -m unittest tests.sources.bing.test_bing.TestBingUrlSync +python -m unittest tests.sources.bing.test_bing.TestBingSearchAsync python -m unittest tests.sources.bing.test_bing.TestBingUrlAsync +python -m unittest tests.sources.google.test_google.TestGoogleAiModeSync +python -m unittest tests.sources.google.test_google.TestGoogleNewsSync + +python -m unittest tests.sources.google_shopping.test_google_shopping.TestGoogleShoppingSearchSync +python -m unittest tests.sources.google_shopping.test_google_shopping.TestGoogleShoppingUrlSync +python -m unittest tests.sources.google_shopping.test_google_shopping.TestGoogleShoppingProductsSync +python -m unittest tests.sources.google_shopping.test_google_shopping.TestGoogleShoppingPricingRemoved + +python -m unittest tests.sources.universal.test_universal.TestUniversalUrlSync + +python -m unittest tests.sources.ebay.test_ebay.TestEbaySearchSync +python -m unittest tests.sources.ebay.test_ebay.TestEbayProductSync +python -m unittest tests.sources.ebay.test_ebay.TestEbayUrlSync + +python -m unittest tests.sources.etsy.test_etsy.TestEtsySearchSync +python -m unittest tests.sources.etsy.test_etsy.TestEtsyProductSync +python -m unittest tests.sources.etsy.test_etsy.TestEtsyUrlSync + +python -m unittest tests.sources.tiktok.test_tiktok.TestTiktokShopSearchSync +python -m unittest tests.sources.tiktok.test_tiktok.TestTiktokShopProductSync +python -m unittest tests.sources.tiktok.test_tiktok.TestTiktokShopUrlSync + +python -m unittest tests.sources.chatgpt.test_chatgpt.TestChatgptSync + +python -m unittest tests.sources.perplexity.test_perplexity.TestPerplexitySync + +python -m unittest tests.sources.youtube.test_youtube.TestYoutubeTranscriptSync +python -m unittest tests.sources.youtube.test_youtube.TestYoutubeSearchSync +python -m unittest tests.sources.youtube.test_youtube.TestYoutubeSearchMaxSync +python -m unittest tests.sources.youtube.test_youtube.TestYoutubeMetadataSync +python -m unittest tests.sources.youtube.test_youtube.TestYoutubeChannelSync +python -m unittest tests.sources.youtube.test_youtube.TestYoutubeSubtitlesSync +python -m unittest tests.sources.youtube.test_youtube.TestYoutubeVideoTrainabilitySync +python -m unittest tests.sources.youtube.test_youtube.TestYoutubeAutocompleteSync +python -m unittest tests.sources.youtube.test_youtube.TestYoutubeDownloadAsync + python -m unittest tests.sources.wayfair.test_wayfair.TestWayfairSearchSync -python -m unittest tests.sources.wayfair.test_wayfair.TestWayfairSearchAsync python -m unittest tests.sources.wayfair.test_wayfair.TestWayfairUrlSync +python -m unittest tests.sources.wayfair.test_wayfair.TestWayfairSearchAsync python -m unittest tests.sources.wayfair.test_wayfair.TestWayfairUrlAsync +python -m unittest tests.sources.kroger.test_kroger.TestKrogerSearchSync +python -m unittest tests.sources.kroger.test_kroger.TestKrogerProductSync +python -m unittest tests.sources.kroger.test_kroger.TestKrogerUrlSync + +python -m unittest tests.sources.walmart.test_walmart.TestWalmartSearchSync +python -m unittest tests.sources.walmart.test_walmart.TestWalmartProductSync +python -m unittest tests.sources.walmart.test_walmart.TestWalmartUrlSync + +python -m unittest tests.sources.target_store.test_target_store.TestTargetStoreSearchSync +python -m unittest tests.sources.target_store.test_target_store.TestTargetStoreProductSync +python -m unittest tests.sources.target_store.test_target_store.TestTargetStoreCategorySync +python -m unittest tests.sources.target_store.test_target_store.TestTargetStoreUrlSync + +python -m unittest tests.sources.bestbuy.test_bestbuy.TestBestbuySearchSync +python -m unittest tests.sources.bestbuy.test_bestbuy.TestBestbuyProductSync + +python -m unittest tests.sources.costco.test_costco.TestCostcoSearchSync +python -m unittest tests.sources.costco.test_costco.TestCostcoProductSync +python -m unittest tests.sources.costco.test_costco.TestCostcoUrlSync + +python -m unittest tests.sources.instacart.test_instacart.TestInstacartSearchSync +python -m unittest tests.sources.instacart.test_instacart.TestInstacartProductSync +python -m unittest tests.sources.instacart.test_instacart.TestInstacartUrlSync + +python -m unittest tests.sources.lowes.test_lowes.TestLowesSearchSync +python -m unittest tests.sources.lowes.test_lowes.TestLowesProductSync +python -m unittest tests.sources.lowes.test_lowes.TestLowesUrlSync + +python -m unittest tests.sources.menards.test_menards.TestMenardsSearchSync +python -m unittest tests.sources.menards.test_menards.TestMenardsProductSync +python -m unittest tests.sources.menards.test_menards.TestMenardsUrlSync + +python -m unittest tests.sources.petco.test_petco.TestPetcoSearchSync +python -m unittest tests.sources.petco.test_petco.TestPetcoUrlSync + +python -m unittest tests.sources.publix.test_publix.TestPublixSearchSync +python -m unittest tests.sources.publix.test_publix.TestPublixProductSync +python -m unittest tests.sources.publix.test_publix.TestPublixUrlSync + +python -m unittest tests.sources.staples.test_staples.TestStaplesSearchSync + +python -m unittest tests.sources.bedbathandbeyond.test_bedbathandbeyond.TestBedbathandbeyondSearchSync +python -m unittest tests.sources.bedbathandbeyond.test_bedbathandbeyond.TestBedbathandbeyondProductSync +python -m unittest tests.sources.bedbathandbeyond.test_bedbathandbeyond.TestBedbathandbeyondUrlSync + +python -m unittest tests.sources.bodegaaurrera.test_bodegaaurrera.TestBodegaaurrerapSearchSync +python -m unittest tests.sources.bodegaaurrera.test_bodegaaurrera.TestBodegaaurreraProductSync +python -m unittest tests.sources.bodegaaurrera.test_bodegaaurrera.TestBodegaaurreraUrlSync + +python -m unittest tests.sources.grainger.test_grainger.TestGraingerSearchSync +python -m unittest tests.sources.grainger.test_grainger.TestGraingerProductSync +python -m unittest tests.sources.grainger.test_grainger.TestGraingerUrlSync + +python -m unittest tests.sources.alibaba.test_alibaba.TestAlibabaSearchSync +python -m unittest tests.sources.alibaba.test_alibaba.TestAlibabaProductSync +python -m unittest tests.sources.alibaba.test_alibaba.TestAlibabaUrlSync + +python -m unittest tests.sources.aliexpress.test_aliexpress.TestAliexpressSearchSync +python -m unittest tests.sources.aliexpress.test_aliexpress.TestAliexpressProductSync +python -m unittest tests.sources.aliexpress.test_aliexpress.TestAliexpressUrlSync + +python -m unittest tests.sources.avnet.test_avnet.TestAvnetSearchSync + +python -m unittest tests.sources.flipkart.test_flipkart.TestFlipkartSearchSync +python -m unittest tests.sources.flipkart.test_flipkart.TestFlipkartProductSync +python -m unittest tests.sources.flipkart.test_flipkart.TestFlipkartUrlSync + +python -m unittest tests.sources.indiamart.test_indiamart.TestIndiamartSearchSync +python -m unittest tests.sources.indiamart.test_indiamart.TestIndiamartProductSync +python -m unittest tests.sources.indiamart.test_indiamart.TestIndiamartUrlSync + +python -m unittest tests.sources.lazada.test_lazada.TestLazadaSearchSync +python -m unittest tests.sources.lazada.test_lazada.TestLazadaProductSync +python -m unittest tests.sources.lazada.test_lazada.TestLazadaUrlSync + +python -m unittest tests.sources.rakuten.test_rakuten.TestRakutenSearchSync +python -m unittest tests.sources.rakuten.test_rakuten.TestRakutenUrlSync + +python -m unittest tests.sources.shein.test_shein.TestSheinSearchSync + +python -m unittest tests.sources.tokopedia.test_tokopedia.TestTokopediaSearchSync +python -m unittest tests.sources.tokopedia.test_tokopedia.TestTokopediaUrlSync + +python -m unittest tests.sources.allegro.test_allegro.TestAllegroSearchSync +python -m unittest tests.sources.allegro.test_allegro.TestAllegroProductSync + +python -m unittest tests.sources.cdiscount.test_cdiscount.TestCdiscountSearchSync +python -m unittest tests.sources.cdiscount.test_cdiscount.TestCdiscountProductSync +python -m unittest tests.sources.cdiscount.test_cdiscount.TestCdiscountUrlSync + +python -m unittest tests.sources.idealo.test_idealo.TestIdealoSearchSync + +python -m unittest tests.sources.mediamarkt.test_mediamarkt.TestMediamarktSearchSync +python -m unittest tests.sources.mediamarkt.test_mediamarkt.TestMediamarktProductSync +python -m unittest tests.sources.mediamarkt.test_mediamarkt.TestMediamarktUrlSync + +python -m unittest tests.sources.dcard.test_dcard.TestDcardSearchSync + +python -m unittest tests.sources.falabella.test_falabella.TestFalabellaSearchSync +python -m unittest tests.sources.falabella.test_falabella.TestFalabellaProductSync +python -m unittest tests.sources.falabella.test_falabella.TestFalabellaUrlSync + +python -m unittest tests.sources.magazineluiza.test_magazineluiza.TestMagazineluizaSearchSync +python -m unittest tests.sources.magazineluiza.test_magazineluiza.TestMagazineluizaProductSync +python -m unittest tests.sources.magazineluiza.test_magazineluiza.TestMagazineluizaUrlSync + +python -m unittest tests.sources.mercadolibre.test_mercadolibre.TestMercadolibreSearchSync +python -m unittest tests.sources.mercadolibre.test_mercadolibre.TestMercadolibreProductSync +python -m unittest tests.sources.mercadolibre.test_mercadolibre.TestMercadolibreUrlSync + +python -m unittest tests.sources.mercadolivre.test_mercadolivre.TestMercadolivreSearchSync +python -m unittest tests.sources.mercadolivre.test_mercadolivre.TestMercadolivreProductSync + +python -m unittest tests.sources.airbnb.test_airbnb.TestAirbnbProductSync +python -m unittest tests.sources.airbnb.test_airbnb.TestAirbnbUrlSync + +python -m unittest tests.sources.zillow.test_zillow.TestZillowUrlSync + # Run proxy tests python -m unittest tests.proxy.test_proxy.TestProxyGet From 56bf251dcbeb5d5ef6eb21e6cd95c477740cd559 Mon Sep 17 00:00:00 2001 From: karolispetkevicius-de Date: Fri, 6 Mar 2026 13:49:02 +0200 Subject: [PATCH 06/14] deleting github actions directory for now --- .github/workflows/event-listener.yml | 34 ---------------------------- 1 file changed, 34 deletions(-) delete mode 100644 .github/workflows/event-listener.yml diff --git a/.github/workflows/event-listener.yml b/.github/workflows/event-listener.yml deleted file mode 100644 index 1134be3..0000000 --- a/.github/workflows/event-listener.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: GitBook Event Receiver - -on: - repository_dispatch: - types: [gitbook_update] - -jobs: - process-gitbook-update: - runs-on: ubuntu-latest - steps: - - name: Acknowledge Event - run: | - echo "🚀 Event Received from GitBook!" - echo "Commit: ${{ github.event.client_payload.sha }}" - echo "Author: ${{ github.event.client_payload.author }}" - echo "Summary: ${{ github.event.client_payload.message }}" - - - name: Show Changed Paths - run: | - echo "The following filtered paths were modified:" - echo "${{ github.event.client_payload.changed_paths }}" - - - name: View Detailed Diff - run: | - echo "--- START OF DIFF ---" - echo "${{ github.event.client_payload.diff }}" - echo "--- END OF DIFF ---" - - # EXAMPLE: Action based on specific path - - name: Conditional Logic - if: contains(github.event.client_payload.changed_paths, 'docs/api-reference') - run: | - echo "Detected change in API Reference. Triggering internal sync..." - # Insert your custom command here (e.g., npm run build-docs) \ No newline at end of file From 093d8a95c2717391b6b2abefddb45cc7043a6b49 Mon Sep 17 00:00:00 2001 From: Karolis Petkevicius Date: Fri, 6 Mar 2026 13:54:47 +0200 Subject: [PATCH 07/14] bump versions --- requirements.txt | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/requirements.txt b/requirements.txt index e64227b..81ff14a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,13 @@ -aiohttp==3.9.4 -aiosignal==1.3.1 -async-timeout==4.0.3 +aiohttp==3.13.3 +aiosignal==1.4.0 +async-timeout==5.0.1 asyncio==3.4.3; python_version < '3.7' -attrs==23.2.0 -certifi==2024.7.4 -charset-normalizer==3.3.2 -frozenlist==1.4.1 -idna==3.7 -multidict==6.0.5 -requests==2.32.2 -urllib3==2.2.2 -yarl==1.9.4 +attrs==25.4.0 +certifi==2026.2.25 +charset-normalizer==3.4.5 +frozenlist==1.8.0 +idna==3.11 +multidict==6.7.1 +requests==2.32.5 +urllib3==2.6.3 +yarl==1.23.0 From 0bf032443e254b1648520a1575e6383d0506b83d Mon Sep 17 00:00:00 2001 From: Karolis Petkevicius Date: Fri, 6 Mar 2026 14:01:55 +0200 Subject: [PATCH 08/14] github actions back --- .github/workflows/event-listener.yml | 34 +++++++++++++++++++ .../oxylabs-sdk-python-karolis.code-workspace | 13 +++++++ 2 files changed, 47 insertions(+) create mode 100644 .github/workflows/event-listener.yml create mode 100644 .github/workflows/oxylabs-sdk-python-karolis.code-workspace diff --git a/.github/workflows/event-listener.yml b/.github/workflows/event-listener.yml new file mode 100644 index 0000000..1134be3 --- /dev/null +++ b/.github/workflows/event-listener.yml @@ -0,0 +1,34 @@ +name: GitBook Event Receiver + +on: + repository_dispatch: + types: [gitbook_update] + +jobs: + process-gitbook-update: + runs-on: ubuntu-latest + steps: + - name: Acknowledge Event + run: | + echo "🚀 Event Received from GitBook!" + echo "Commit: ${{ github.event.client_payload.sha }}" + echo "Author: ${{ github.event.client_payload.author }}" + echo "Summary: ${{ github.event.client_payload.message }}" + + - name: Show Changed Paths + run: | + echo "The following filtered paths were modified:" + echo "${{ github.event.client_payload.changed_paths }}" + + - name: View Detailed Diff + run: | + echo "--- START OF DIFF ---" + echo "${{ github.event.client_payload.diff }}" + echo "--- END OF DIFF ---" + + # EXAMPLE: Action based on specific path + - name: Conditional Logic + if: contains(github.event.client_payload.changed_paths, 'docs/api-reference') + run: | + echo "Detected change in API Reference. Triggering internal sync..." + # Insert your custom command here (e.g., npm run build-docs) \ No newline at end of file diff --git a/.github/workflows/oxylabs-sdk-python-karolis.code-workspace b/.github/workflows/oxylabs-sdk-python-karolis.code-workspace new file mode 100644 index 0000000..4c6c91a --- /dev/null +++ b/.github/workflows/oxylabs-sdk-python-karolis.code-workspace @@ -0,0 +1,13 @@ +{ + "folders": [ + { + "name": "oxylabs-sdk-python-karolis", + "path": "../.." + }, + { + "name": "tests", + "path": "../../../oxylabs-sdk-master-repo/oxylabs-sdk-python/tests" + } + ], + "settings": {} +} \ No newline at end of file From 2caefc067a480962fab7ea42ccc85696697e8edc Mon Sep 17 00:00:00 2001 From: Karolis Petkevicius Date: Fri, 6 Mar 2026 14:35:39 +0200 Subject: [PATCH 09/14] remove workflows for now: --- .github/workflows/event-listener.yml | 34 ------------------- .../oxylabs-sdk-python-karolis.code-workspace | 13 ------- 2 files changed, 47 deletions(-) delete mode 100644 .github/workflows/event-listener.yml delete mode 100644 .github/workflows/oxylabs-sdk-python-karolis.code-workspace diff --git a/.github/workflows/event-listener.yml b/.github/workflows/event-listener.yml deleted file mode 100644 index 1134be3..0000000 --- a/.github/workflows/event-listener.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: GitBook Event Receiver - -on: - repository_dispatch: - types: [gitbook_update] - -jobs: - process-gitbook-update: - runs-on: ubuntu-latest - steps: - - name: Acknowledge Event - run: | - echo "🚀 Event Received from GitBook!" - echo "Commit: ${{ github.event.client_payload.sha }}" - echo "Author: ${{ github.event.client_payload.author }}" - echo "Summary: ${{ github.event.client_payload.message }}" - - - name: Show Changed Paths - run: | - echo "The following filtered paths were modified:" - echo "${{ github.event.client_payload.changed_paths }}" - - - name: View Detailed Diff - run: | - echo "--- START OF DIFF ---" - echo "${{ github.event.client_payload.diff }}" - echo "--- END OF DIFF ---" - - # EXAMPLE: Action based on specific path - - name: Conditional Logic - if: contains(github.event.client_payload.changed_paths, 'docs/api-reference') - run: | - echo "Detected change in API Reference. Triggering internal sync..." - # Insert your custom command here (e.g., npm run build-docs) \ No newline at end of file diff --git a/.github/workflows/oxylabs-sdk-python-karolis.code-workspace b/.github/workflows/oxylabs-sdk-python-karolis.code-workspace deleted file mode 100644 index 4c6c91a..0000000 --- a/.github/workflows/oxylabs-sdk-python-karolis.code-workspace +++ /dev/null @@ -1,13 +0,0 @@ -{ - "folders": [ - { - "name": "oxylabs-sdk-python-karolis", - "path": "../.." - }, - { - "name": "tests", - "path": "../../../oxylabs-sdk-master-repo/oxylabs-sdk-python/tests" - } - ], - "settings": {} -} \ No newline at end of file From 468c36d6ae741f0228ede9d9132dbaf28758c2e2 Mon Sep 17 00:00:00 2001 From: Karolis Petkevicius Date: Fri, 6 Mar 2026 14:41:55 +0200 Subject: [PATCH 10/14] remove workflows for now: --- .github/workflows/event-listener.yml | 34 ------------------- .../oxylabs-sdk-python-karolis.code-workspace | 13 ------- 2 files changed, 47 deletions(-) delete mode 100644 .github/workflows/event-listener.yml delete mode 100644 .github/workflows/oxylabs-sdk-python-karolis.code-workspace diff --git a/.github/workflows/event-listener.yml b/.github/workflows/event-listener.yml deleted file mode 100644 index 1134be3..0000000 --- a/.github/workflows/event-listener.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: GitBook Event Receiver - -on: - repository_dispatch: - types: [gitbook_update] - -jobs: - process-gitbook-update: - runs-on: ubuntu-latest - steps: - - name: Acknowledge Event - run: | - echo "🚀 Event Received from GitBook!" - echo "Commit: ${{ github.event.client_payload.sha }}" - echo "Author: ${{ github.event.client_payload.author }}" - echo "Summary: ${{ github.event.client_payload.message }}" - - - name: Show Changed Paths - run: | - echo "The following filtered paths were modified:" - echo "${{ github.event.client_payload.changed_paths }}" - - - name: View Detailed Diff - run: | - echo "--- START OF DIFF ---" - echo "${{ github.event.client_payload.diff }}" - echo "--- END OF DIFF ---" - - # EXAMPLE: Action based on specific path - - name: Conditional Logic - if: contains(github.event.client_payload.changed_paths, 'docs/api-reference') - run: | - echo "Detected change in API Reference. Triggering internal sync..." - # Insert your custom command here (e.g., npm run build-docs) \ No newline at end of file diff --git a/.github/workflows/oxylabs-sdk-python-karolis.code-workspace b/.github/workflows/oxylabs-sdk-python-karolis.code-workspace deleted file mode 100644 index 4c6c91a..0000000 --- a/.github/workflows/oxylabs-sdk-python-karolis.code-workspace +++ /dev/null @@ -1,13 +0,0 @@ -{ - "folders": [ - { - "name": "oxylabs-sdk-python-karolis", - "path": "../.." - }, - { - "name": "tests", - "path": "../../../oxylabs-sdk-master-repo/oxylabs-sdk-python/tests" - } - ], - "settings": {} -} \ No newline at end of file From 8248a1d7ca1c88106af29d3631b6a71460398b47 Mon Sep 17 00:00:00 2001 From: karolispetkevicius-de Date: Fri, 6 Mar 2026 14:56:08 +0200 Subject: [PATCH 11/14] Delete src/oxylabs/sources/youtube_transcript directory ( deprecated) --- .../sources/youtube_transcript/__init__.py | 1 - .../youtube_transcript/youtube_transcript.py | 106 ------------------ 2 files changed, 107 deletions(-) delete mode 100644 src/oxylabs/sources/youtube_transcript/__init__.py delete mode 100644 src/oxylabs/sources/youtube_transcript/youtube_transcript.py diff --git a/src/oxylabs/sources/youtube_transcript/__init__.py b/src/oxylabs/sources/youtube_transcript/__init__.py deleted file mode 100644 index 3e7162c..0000000 --- a/src/oxylabs/sources/youtube_transcript/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .youtube_transcript import YoutubeTranscript, YoutubeTranscriptAsync \ No newline at end of file diff --git a/src/oxylabs/sources/youtube_transcript/youtube_transcript.py b/src/oxylabs/sources/youtube_transcript/youtube_transcript.py deleted file mode 100644 index 610a514..0000000 --- a/src/oxylabs/sources/youtube_transcript/youtube_transcript.py +++ /dev/null @@ -1,106 +0,0 @@ -from typing import Optional - -from oxylabs.internal.api import RealtimeAPI, AsyncAPI -from oxylabs.sources.response import Response -from oxylabs.utils.types import source -from oxylabs.utils.utils import prepare_config - - -class YoutubeTranscript: - def __init__(self, api_instance:RealtimeAPI) -> None: - """ - Initializes an instance of the YoutubeTranscript class. - - Args: - api_instance: An instance of the RealtimeAPI class used for making requests. - """ - self._api_instance = api_instance - - def scrape_transcript( - self, - query: str, - context: Optional[list] = None, - callback_url: Optional[str] = None, - request_timeout: Optional[int] = 165, - **kwargs - ) -> Response: - """ - Scrapes a YouTube video transcript for a given query. - - Args: - query (str): A YouTube video ID - context: Optional[list], - callback_url (Optional[str]): URL to your callback endpoint. - request_timeout (int | 165, optional): The interval in seconds for - the request to time out if no response is returned. - Defaults to 165. - - Returns: - Response: The response from the server after the job is completed. - """ - - config = prepare_config(request_timeout=request_timeout) - payload = { - "source": source.YOUTUBE_TRANSCRIPT, - "query": query, - "context": context, - "callback_url": callback_url, - **kwargs, - } - api_response = self._api_instance.get_response(payload, config) - return Response(api_response) - -class YoutubeTranscriptAsync: - def __init__(self, api_instance:AsyncAPI) -> None: - """ - Initializes an instance of the YoutubeTranscriptAsync class. - - Args: - api_instance: An instance of the AsyncAPI class used for making requests. - """ - self._api_instance = api_instance - - async def scrape_transcript( - self, - query: str, - context: Optional[list] = None, - callback_url: Optional[str] = None, - request_timeout: Optional[int] = 165, - job_completion_timeout: Optional[int] = None, - poll_interval: Optional[int] = None, - **kwargs - ) -> Response: - """ - Asynchronously scrapes a YouTube video transcript for a given query. - - Args: - query (str): A YouTube video ID - context: Optional[list], - callback_url (Optional[str]): URL to your callback endpoint. - request_timeout (int | 165, optional): The interval in seconds for - the request to time out if no response is returned. - Defaults to 165. - poll_interval (Optional[int]): The interval in seconds to poll - the server for a response. - job_completion_timeout (Optional[int]): The interval in - seconds for the job to time out if no response is returned. - - Returns: - Response: The response from the server after the job is completed. - """ - - config = prepare_config( - request_timeout=request_timeout, - poll_interval=poll_interval, - job_completion_timeout=job_completion_timeout, - async_integration=True, - ) - payload = { - "source": source.YOUTUBE_TRANSCRIPT, - "query": query, - "context": context, - "callback_url": callback_url, - **kwargs, - } - api_response = await self._api_instance.get_response(payload, config) - return Response(api_response) From bb9b110438504c5c9ff6925e5fce320581bfda37 Mon Sep 17 00:00:00 2001 From: karolispetkevicius-de Date: Fri, 6 Mar 2026 16:14:33 +0200 Subject: [PATCH 12/14] "Claude PR Assistant workflow" --- .github/workflows/claude.yml | 50 ++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 .github/workflows/claude.yml diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 0000000..79fe056 --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,50 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@v1 + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. + # prompt: 'Update the pull request description to include a summary of changes.' + + # Optional: Add claude_args to customize behavior and configuration + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://code.claude.com/docs/en/cli-reference for available options + # claude_args: '--allowed-tools Bash(gh pr:*)' + From ca8dc7b112e4c196a60cd0116b8a8ca7f13abf9d Mon Sep 17 00:00:00 2001 From: karolispetkevicius-de Date: Fri, 6 Mar 2026 16:14:34 +0200 Subject: [PATCH 13/14] "Claude Code Review workflow" --- .github/workflows/claude-code-review.yml | 44 ++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/workflows/claude-code-review.yml diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml new file mode 100644 index 0000000..4f6145b --- /dev/null +++ b/.github/workflows/claude-code-review.yml @@ -0,0 +1,44 @@ +name: Claude Code Review + +on: + pull_request: + types: [opened, synchronize, ready_for_review, reopened] + # Optional: Only run on specific file changes + # paths: + # - "src/**/*.ts" + # - "src/**/*.tsx" + # - "src/**/*.js" + # - "src/**/*.jsx" + +jobs: + claude-review: + # Optional: Filter by PR author + # if: | + # github.event.pull_request.user.login == 'external-contributor' || + # github.event.pull_request.user.login == 'new-developer' || + # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' + + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code Review + id: claude-review + uses: anthropics/claude-code-action@v1 + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + plugin_marketplaces: 'https://github.com/anthropics/claude-code.git' + plugins: 'code-review@claude-code-plugins' + prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}' + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://code.claude.com/docs/en/cli-reference for available options + From 72d729c8f983032e8bef8af6e871647d95cae319 Mon Sep 17 00:00:00 2001 From: Karolis Petkevicius Date: Fri, 6 Mar 2026 16:42:28 +0200 Subject: [PATCH 14/14] delete workflows 2 --- .github/workflows/claude-code-review.yml | 44 --------------------- .github/workflows/claude.yml | 50 ------------------------ 2 files changed, 94 deletions(-) delete mode 100644 .github/workflows/claude-code-review.yml delete mode 100644 .github/workflows/claude.yml diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml deleted file mode 100644 index 4f6145b..0000000 --- a/.github/workflows/claude-code-review.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: Claude Code Review - -on: - pull_request: - types: [opened, synchronize, ready_for_review, reopened] - # Optional: Only run on specific file changes - # paths: - # - "src/**/*.ts" - # - "src/**/*.tsx" - # - "src/**/*.js" - # - "src/**/*.jsx" - -jobs: - claude-review: - # Optional: Filter by PR author - # if: | - # github.event.pull_request.user.login == 'external-contributor' || - # github.event.pull_request.user.login == 'new-developer' || - # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' - - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: read - issues: read - id-token: write - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Run Claude Code Review - id: claude-review - uses: anthropics/claude-code-action@v1 - with: - anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - plugin_marketplaces: 'https://github.com/anthropics/claude-code.git' - plugins: 'code-review@claude-code-plugins' - prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}' - # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md - # or https://code.claude.com/docs/en/cli-reference for available options - diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml deleted file mode 100644 index 79fe056..0000000 --- a/.github/workflows/claude.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Claude Code - -on: - issue_comment: - types: [created] - pull_request_review_comment: - types: [created] - issues: - types: [opened, assigned] - pull_request_review: - types: [submitted] - -jobs: - claude: - if: | - (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || - (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || - (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || - (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: read - issues: read - id-token: write - actions: read # Required for Claude to read CI results on PRs - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Run Claude Code - id: claude - uses: anthropics/claude-code-action@v1 - with: - anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} - - # This is an optional setting that allows Claude to read CI results on PRs - additional_permissions: | - actions: read - - # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. - # prompt: 'Update the pull request description to include a summary of changes.' - - # Optional: Add claude_args to customize behavior and configuration - # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md - # or https://code.claude.com/docs/en/cli-reference for available options - # claude_args: '--allowed-tools Bash(gh pr:*)' -