PY

Quick Start: Python

Make your first API call with Python and parse the HTML response with BeautifulSoup.

Checking API key...

Prerequisites

Installation

Install the requests library for HTTP calls and BeautifulSoup for HTML parsing.

pip install requests beautifulsoup4

Sync Scraping (Simple)

The sync endpoint waits for the scrape to complete and returns the HTML in a single response. Best for quick scripts and testing.

# scrape.py import requests response = requests.post( 'https://api.fastwebscraper.com', headers={ 'X-API-Key': 'YOUR_API_KEY', 'Content-Type': 'application/json', }, json={ 'url': 'https://example.com', 'mode': 'auto', }, timeout=60, ) if response.status_code != 200: print('Error:', response.json()) exit(1) result = response.json() print('Status code:', result['data']['statusCode']) print('HTML length:', len(result['data']['html']))

Async Scraping (Production)

The async endpoint returns a job ID immediately. Poll for the result when it's ready. Use this for production workloads and batch processing.

# scrape_async.py import requests import time # Step 1: Submit the scrape job submit_response = requests.post( 'https://api.fastwebscraper.com', headers={ 'X-API-Key': 'YOUR_API_KEY', 'Content-Type': 'application/json', }, json={ 'url': 'https://example.com', 'mode': 'auto', 'waitForSelector': '.main-content', }, ) data = submit_response.json() job_id = data['data']['jobId'] print(f'Job submitted: {job_id}') # Step 2: Poll for the result while True: time.sleep(2) status_response = requests.get( f'https://api.fastwebscraper.com/{job_id}', headers={'X-API-Key': 'YOUR_API_KEY'}, ) job = status_response.json() status = job['data']['status'] print(f'Status: {status}') if status in ('COMPLETED', 'FAILED'): break if status == 'COMPLETED': print(f'HTML length: {len(job["data"]["html"])}') else: print(f'Job failed: {job["data"].get("error")}')

Parsing HTML with BeautifulSoup

Once you have the HTML, use BeautifulSoup to extract structured data. It provides an intuitive API for searching and navigating HTML documents.

from bs4 import BeautifulSoup # Assuming 'html' contains the scraped HTML string soup = BeautifulSoup(html, 'html.parser') # Extract text content page_title = soup.find('h1').text.strip() print(f'Page title: {page_title}') # Extract all links links = [ {'text': a.text.strip(), 'href': a['href']} for a in soup.find_all('a', href=True) ] print(f'Links found: {len(links)}') # Extract product data (example) products = [] for card in soup.select('.product-card'): products.append({ 'name': card.select_one('.product-name').text.strip(), 'price': card.select_one('.product-price').text.strip(), 'url': card.select_one('a')['href'] if card.select_one('a') else None, }) print(f'Products: {products}') # Extract table data table_rows = [] for row in soup.select('table tbody tr'): cells = [cell.text.strip() for cell in row.find_all('td')] if cells: table_rows.append(cells) print(f'Table rows: {table_rows}')

Complete Example: Scrape and Parse

Here's a complete script that scrapes a page and extracts structured data.

# scrape_products.py import requests from bs4 import BeautifulSoup from datetime import datetime def scrape_and_parse(url: str) -> dict: """Scrape a page and extract product data.""" response = requests.post( 'https://api.fastwebscraper.com', headers={ 'X-API-Key': 'YOUR_API_KEY', 'Content-Type': 'application/json', }, json={ 'url': url, 'mode': 'auto', 'waitForSelector': '.product-card', }, timeout=60, ) response.raise_for_status() result = response.json() # Parse the HTML soup = BeautifulSoup(result['data']['html'], 'html.parser') products = [] for card in soup.select('.product-card'): name_el = card.select_one('.product-name') price_el = card.select_one('.price') img_el = card.select_one('img') link_el = card.select_one('a') products.append({ 'name': name_el.text.strip() if name_el else None, 'price': price_el.text.strip() if price_el else None, 'image': img_el['src'] if img_el else None, 'link': link_el['href'] if link_el else None, }) return { 'url': url, 'scraped_at': datetime.now().isoformat(), 'products': products, } # Usage if __name__ == '__main__': import json data = scrape_and_parse('https://store.example.com/products') print(json.dumps(data, indent=2))

Error Handling

Always handle errors and implement retries for production use.

import time import requests def scrape_with_retry(url: str, retries: int = 3) -> dict: """Scrape with exponential backoff retry.""" for attempt in range(1, retries + 1): try: response = requests.post( 'https://api.fastwebscraper.com', headers={ 'X-API-Key': 'YOUR_API_KEY', 'Content-Type': 'application/json', }, json={'url': url, 'mode': 'auto'}, timeout=60, ) if response.status_code == 429: # Rate limited — wait and retry delay = 2 ** attempt print(f'Rate limited. Waiting {delay}s...') time.sleep(delay) continue response.raise_for_status() return response.json() except Exception as e: if attempt == retries: raise delay = 2 ** attempt print(f'Attempt {attempt} failed: {e}. Retrying in {delay}s...') time.sleep(delay) raise Exception(f'Failed after {retries} retries')

Async with aiohttp (Advanced)

For scraping multiple URLs concurrently, use aiohttp with asyncio.

pip install aiohttp
import asyncio import aiohttp async def scrape_url(session: aiohttp.ClientSession, url: str) -> dict: """Scrape a single URL asynchronously.""" async with session.post( 'https://api.fastwebscraper.com', headers={ 'X-API-Key': 'YOUR_API_KEY', 'Content-Type': 'application/json', }, json={'url': url, 'mode': 'auto'}, ) as response: data = await response.json() return {'url': url, 'jobId': data['data']['jobId']} async def main(): urls = [ 'https://example.com/page/1', 'https://example.com/page/2', 'https://example.com/page/3', ] async with aiohttp.ClientSession() as session: tasks = [scrape_url(session, url) for url in urls] results = await asyncio.gather(*tasks) for result in results: print(f"Queued {result['url']} -> Job {result['jobId']}") asyncio.run(main())

Next Steps