Quick Start: Python
Make your first API call with Python and parse the HTML response with BeautifulSoup.
Prerequisites
- Python 3.8+
- A FastWebScraper API key (get one free)
Installation
Install the requests library for HTTP calls and BeautifulSoup for HTML parsing.
pip install requests beautifulsoup4Sync Scraping (Simple)
The sync endpoint waits for the scrape to complete and returns the HTML in a single response. Best for quick scripts and testing.
# scrape.py
import requests
response = requests.post(
'https://api.fastwebscraper.com',
headers={
'X-API-Key': 'YOUR_API_KEY',
'Content-Type': 'application/json',
},
json={
'url': 'https://example.com',
'mode': 'auto',
},
timeout=60,
)
if response.status_code != 200:
print('Error:', response.json())
exit(1)
result = response.json()
print('Status code:', result['data']['statusCode'])
print('HTML length:', len(result['data']['html']))Async Scraping (Production)
The async endpoint returns a job ID immediately. Poll for the result when it's ready. Use this for production workloads and batch processing.
# scrape_async.py
import requests
import time
# Step 1: Submit the scrape job
submit_response = requests.post(
'https://api.fastwebscraper.com',
headers={
'X-API-Key': 'YOUR_API_KEY',
'Content-Type': 'application/json',
},
json={
'url': 'https://example.com',
'mode': 'auto',
'waitForSelector': '.main-content',
},
)
data = submit_response.json()
job_id = data['data']['jobId']
print(f'Job submitted: {job_id}')
# Step 2: Poll for the result
while True:
time.sleep(2)
status_response = requests.get(
f'https://api.fastwebscraper.com/{job_id}',
headers={'X-API-Key': 'YOUR_API_KEY'},
)
job = status_response.json()
status = job['data']['status']
print(f'Status: {status}')
if status in ('COMPLETED', 'FAILED'):
break
if status == 'COMPLETED':
print(f'HTML length: {len(job["data"]["html"])}')
else:
print(f'Job failed: {job["data"].get("error")}')Parsing HTML with BeautifulSoup
Once you have the HTML, use BeautifulSoup to extract structured data. It provides an intuitive API for searching and navigating HTML documents.
from bs4 import BeautifulSoup
# Assuming 'html' contains the scraped HTML string
soup = BeautifulSoup(html, 'html.parser')
# Extract text content
page_title = soup.find('h1').text.strip()
print(f'Page title: {page_title}')
# Extract all links
links = [
{'text': a.text.strip(), 'href': a['href']}
for a in soup.find_all('a', href=True)
]
print(f'Links found: {len(links)}')
# Extract product data (example)
products = []
for card in soup.select('.product-card'):
products.append({
'name': card.select_one('.product-name').text.strip(),
'price': card.select_one('.product-price').text.strip(),
'url': card.select_one('a')['href'] if card.select_one('a') else None,
})
print(f'Products: {products}')
# Extract table data
table_rows = []
for row in soup.select('table tbody tr'):
cells = [cell.text.strip() for cell in row.find_all('td')]
if cells:
table_rows.append(cells)
print(f'Table rows: {table_rows}')Complete Example: Scrape and Parse
Here's a complete script that scrapes a page and extracts structured data.
# scrape_products.py
import requests
from bs4 import BeautifulSoup
from datetime import datetime
def scrape_and_parse(url: str) -> dict:
"""Scrape a page and extract product data."""
response = requests.post(
'https://api.fastwebscraper.com',
headers={
'X-API-Key': 'YOUR_API_KEY',
'Content-Type': 'application/json',
},
json={
'url': url,
'mode': 'auto',
'waitForSelector': '.product-card',
},
timeout=60,
)
response.raise_for_status()
result = response.json()
# Parse the HTML
soup = BeautifulSoup(result['data']['html'], 'html.parser')
products = []
for card in soup.select('.product-card'):
name_el = card.select_one('.product-name')
price_el = card.select_one('.price')
img_el = card.select_one('img')
link_el = card.select_one('a')
products.append({
'name': name_el.text.strip() if name_el else None,
'price': price_el.text.strip() if price_el else None,
'image': img_el['src'] if img_el else None,
'link': link_el['href'] if link_el else None,
})
return {
'url': url,
'scraped_at': datetime.now().isoformat(),
'products': products,
}
# Usage
if __name__ == '__main__':
import json
data = scrape_and_parse('https://store.example.com/products')
print(json.dumps(data, indent=2))Error Handling
Always handle errors and implement retries for production use.
import time
import requests
def scrape_with_retry(url: str, retries: int = 3) -> dict:
"""Scrape with exponential backoff retry."""
for attempt in range(1, retries + 1):
try:
response = requests.post(
'https://api.fastwebscraper.com',
headers={
'X-API-Key': 'YOUR_API_KEY',
'Content-Type': 'application/json',
},
json={'url': url, 'mode': 'auto'},
timeout=60,
)
if response.status_code == 429:
# Rate limited — wait and retry
delay = 2 ** attempt
print(f'Rate limited. Waiting {delay}s...')
time.sleep(delay)
continue
response.raise_for_status()
return response.json()
except Exception as e:
if attempt == retries:
raise
delay = 2 ** attempt
print(f'Attempt {attempt} failed: {e}. Retrying in {delay}s...')
time.sleep(delay)
raise Exception(f'Failed after {retries} retries')Async with aiohttp (Advanced)
For scraping multiple URLs concurrently, use aiohttp with asyncio.
pip install aiohttpimport asyncio
import aiohttp
async def scrape_url(session: aiohttp.ClientSession, url: str) -> dict:
"""Scrape a single URL asynchronously."""
async with session.post(
'https://api.fastwebscraper.com',
headers={
'X-API-Key': 'YOUR_API_KEY',
'Content-Type': 'application/json',
},
json={'url': url, 'mode': 'auto'},
) as response:
data = await response.json()
return {'url': url, 'jobId': data['data']['jobId']}
async def main():
urls = [
'https://example.com/page/1',
'https://example.com/page/2',
'https://example.com/page/3',
]
async with aiohttp.ClientSession() as session:
tasks = [scrape_url(session, url) for url in urls]
results = await asyncio.gather(*tasks)
for result in results:
print(f"Queued {result['url']} -> Job {result['jobId']}")
asyncio.run(main())Next Steps
- API Reference — all endpoints, parameters, and response schemas
- Async vs Sync Guide — when to use each scraping mode
- Error Handling Guide — production-ready retry patterns
- Use Cases — industry-specific scraping examples