import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
class WebScraper:
def init(self, base_url, delay=1):
self.base_url = base_url
self.delay = delay
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0'
})
self.data = []
def fetch_page(self, url):
"""Fetch a page safely with error handling."""
try:
response = self.session.get(url, timeout=10)
response.raise_for_status()
return BeautifulSoup(response.text, 'html.parser')
except requests.RequestException as e:
print(f"Error fetching {url}: {e}")
return None
def scrape_quotes(self):
"""Example: Scrape quotes.toscrape.com"""
page = 1
while True:
url = f"{self.base_url}/page/{page}/"
print(f"Scraping page {page}...")
soup = self.fetch_page(url)
if not soup:
break
quotes = soup.find_all('div', class_='quote')
if not quotes:
break
for q in quotes:
self.data.append({
'text': q.find('span', class_='text').text,
'author': q.find('small', class_='author').text,
'tags': ', '.join([t.text for t in q.find_all('a', class_='tag')])
})
time.sleep(self.delay) # Be polite to servers!
page += 1
def save(self, filename='output.csv'):
df = pd.DataFrame(self.data)
df.to_csv(filename, index=False, encoding='utf-8')
print(f"✅ Saved {len(df)} records → {filename}")
return df
─── RUN ────────────────────────────────────────────
if name == 'main':
scraper = WebScraper('https://quotes.toscrape.com')
scraper.scrape_quotes()
df = scraper.save('quotes.csv')
print(df.head())
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
class WebScraper:
def init(self, base_url, delay=1):
self.base_url = base_url
self.delay = delay
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0'
})
self.data = []
─── RUN ────────────────────────────────────────────
if name == 'main':
scraper = WebScraper('https://quotes.toscrape.com')
scraper.scrape_quotes()
df = scraper.save('quotes.csv')
print(df.head())