Quick Start: Python

Make your first API call with Python and parse the HTML response with BeautifulSoup.

Checking API key...

Prerequisites

Python 3.8+
A FastWebScraper API key (get one free)

Installation

Install the requests library for HTTP calls and BeautifulSoup for HTML parsing.

pip install requests beautifulsoup4

Sync Scraping (Simple)

The sync endpoint waits for the scrape to complete and returns the HTML in a single response. Best for quick scripts and testing.

# scrape.py
import requests

response = requests.post(
    'https://api.fastwebscraper.com',
    headers={
        'X-API-Key': 'YOUR_API_KEY',
        'Content-Type': 'application/json',
    },
    json={
        'url': 'https://example.com',
        'mode': 'auto',
    },
    timeout=60,
)

if response.status_code != 200:
    print('Error:', response.json())
    exit(1)

result = response.json()
print('Status code:', result['data']['statusCode'])
print('HTML length:', len(result['data']['html']))

Async Scraping (Production)

The async endpoint returns a job ID immediately. Poll for the result when it's ready. Use this for production workloads and batch processing.

# scrape_async.py
import requests
import time

# Step 1: Submit the scrape job
submit_response = requests.post(
    'https://api.fastwebscraper.com',
    headers={
        'X-API-Key': 'YOUR_API_KEY',
        'Content-Type': 'application/json',
    },
    json={
        'url': 'https://example.com',
        'mode': 'auto',
        'waitForSelector': '.main-content',
    },
)

data = submit_response.json()
job_id = data['data']['jobId']
print(f'Job submitted: {job_id}')

# Step 2: Poll for the result
while True:
    time.sleep(2)
    status_response = requests.get(
        f'https://api.fastwebscraper.com/{job_id}',
        headers={'X-API-Key': 'YOUR_API_KEY'},
    )
    job = status_response.json()
    status = job['data']['status']
    print(f'Status: {status}')

    if status in ('COMPLETED', 'FAILED'):
        break

if status == 'COMPLETED':
    print(f'HTML length: {len(job["data"]["html"])}')
else:
    print(f'Job failed: {job["data"].get("error")}')

Parsing HTML with BeautifulSoup

Once you have the HTML, use BeautifulSoup to extract structured data. It provides an intuitive API for searching and navigating HTML documents.

from bs4 import BeautifulSoup

# Assuming 'html' contains the scraped HTML string
soup = BeautifulSoup(html, 'html.parser')

# Extract text content
page_title = soup.find('h1').text.strip()
print(f'Page title: {page_title}')

# Extract all links
links = [
    {'text': a.text.strip(), 'href': a['href']}
    for a in soup.find_all('a', href=True)
]
print(f'Links found: {len(links)}')

# Extract product data (example)
products = []
for card in soup.select('.product-card'):
    products.append({
        'name': card.select_one('.product-name').text.strip(),
        'price': card.select_one('.product-price').text.strip(),
        'url': card.select_one('a')['href'] if card.select_one('a') else None,
    })
print(f'Products: {products}')

# Extract table data
table_rows = []
for row in soup.select('table tbody tr'):
    cells = [cell.text.strip() for cell in row.find_all('td')]
    if cells:
        table_rows.append(cells)
print(f'Table rows: {table_rows}')

Complete Example: Scrape and Parse

Here's a complete script that scrapes a page and extracts structured data.

# scrape_products.py
import requests
from bs4 import BeautifulSoup
from datetime import datetime


def scrape_and_parse(url: str) -> dict:
    """Scrape a page and extract product data."""
    response = requests.post(
        'https://api.fastwebscraper.com',
        headers={
            'X-API-Key': 'YOUR_API_KEY',
            'Content-Type': 'application/json',
        },
        json={
            'url': url,
            'mode': 'auto',
            'waitForSelector': '.product-card',
        },
        timeout=60,
    )
    response.raise_for_status()
    result = response.json()

    # Parse the HTML
    soup = BeautifulSoup(result['data']['html'], 'html.parser')

    products = []
    for card in soup.select('.product-card'):
        name_el = card.select_one('.product-name')
        price_el = card.select_one('.price')
        img_el = card.select_one('img')
        link_el = card.select_one('a')

        products.append({
            'name': name_el.text.strip() if name_el else None,
            'price': price_el.text.strip() if price_el else None,
            'image': img_el['src'] if img_el else None,
            'link': link_el['href'] if link_el else None,
        })

    return {
        'url': url,
        'scraped_at': datetime.now().isoformat(),
        'products': products,
    }


# Usage
if __name__ == '__main__':
    import json

    data = scrape_and_parse('https://store.example.com/products')
    print(json.dumps(data, indent=2))

Error Handling

Always handle errors and implement retries for production use.

import time
import requests


def scrape_with_retry(url: str, retries: int = 3) -> dict:
    """Scrape with exponential backoff retry."""
    for attempt in range(1, retries + 1):
        try:
            response = requests.post(
                'https://api.fastwebscraper.com',
                headers={
                    'X-API-Key': 'YOUR_API_KEY',
                    'Content-Type': 'application/json',
                },
                json={'url': url, 'mode': 'auto'},
                timeout=60,
            )

            if response.status_code == 429:
                # Rate limited — wait and retry
                delay = 2 ** attempt
                print(f'Rate limited. Waiting {delay}s...')
                time.sleep(delay)
                continue

            response.raise_for_status()
            return response.json()

        except Exception as e:
            if attempt == retries:
                raise
            delay = 2 ** attempt
            print(f'Attempt {attempt} failed: {e}. Retrying in {delay}s...')
            time.sleep(delay)

    raise Exception(f'Failed after {retries} retries')

Async with aiohttp (Advanced)

For scraping multiple URLs concurrently, use aiohttp with asyncio.

pip install aiohttp

import asyncio
import aiohttp


async def scrape_url(session: aiohttp.ClientSession, url: str) -> dict:
    """Scrape a single URL asynchronously."""
    async with session.post(
        'https://api.fastwebscraper.com',
        headers={
            'X-API-Key': 'YOUR_API_KEY',
            'Content-Type': 'application/json',
        },
        json={'url': url, 'mode': 'auto'},
    ) as response:
        data = await response.json()
        return {'url': url, 'jobId': data['data']['jobId']}


async def main():
    urls = [
        'https://example.com/page/1',
        'https://example.com/page/2',
        'https://example.com/page/3',
    ]

    async with aiohttp.ClientSession() as session:
        tasks = [scrape_url(session, url) for url in urls]
        results = await asyncio.gather(*tasks)

    for result in results:
        print(f"Queued {result['url']} -> Job {result['jobId']}")


asyncio.run(main())

Next Steps

API Reference — all endpoints, parameters, and response schemas
Async vs Sync Guide — when to use each scraping mode
Error Handling Guide — production-ready retry patterns
Use Cases — industry-specific scraping examples