Simple Web Scraper Example
#!/usr/bin/env python3 """ Simple Web Scraper Example यह स्क्रैपर किसी वेबसाइट से डेटा निकालने का basic example है """ import requests from bs4 import BeautifulSoup import json def scrape_quotes(): """ Example: Quotes को scrape करना (http://quotes.toscrape.com से) """ url = "http://quotes.toscrape.com/" try: # Website को request भेजें print(f"🔍 Scraping: {url}") response = requests.get(url, timeout=10) response.raise_for_status() # Error check करें # HTML को parse करें soup = BeautifulSoup(response.content, 'html.parser') # सभी quotes ढूंढें quotes_data = [] quotes = soup.find_all('div', class_='quote') print(f"\n✅ Found {len(quotes)} quotes!\n") for quote in quotes: # Text extract करें text = quote.find('span', class_='text').get_text() author = quote.find('small', class_='author').get_text() tags = [tag.get_text() for tag in quote.find_all('a', class_='tag')] quote_dict = { 'quote': text, 'author': author, 'tags': tags } quotes_data.append(quote_dict) # Print करें print(f"📝 Quote: {text}") print(f"👤 Author: {author}") print(f"🏷️ Tags: {', '.join(tags)}") print("-" * 80) return quotes_data except requests.RequestException as e: print(f"❌ Error: {e}") return None def scrape_custom_website(url, element_selector): """ Generic scraper - किसी भी website के लिए Args: url: Website का URL element_selector: CSS selector या HTML tag """ try: response = requests.get(url, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') # Title निकालें title = soup.find('title') print(f"📄 Page Title: {title.get_text() if title else 'No title'}") # सभी headings ढूंढें headings = soup.find_all(['h1', 'h2', 'h3']) print(f"\n📋 Found {len(headings)} headings:") for i, heading in enumerate(headings[:5], 1): # पहले 5 दिखाएं print(f" {i}. {heading.get_text().strip()}") return soup except Exception as e: print(f"❌ Error: {e}") return None def save_to_json(data, filename='scraped_data.json'): """ Scraped data को JSON file में save करें """ try: with open(filename, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) print(f"\n💾 Data saved to: {filename}") except Exception as e: print(f"❌ Error saving file: {e}") if __name__ == "__main__": print("=" * 80) print("🕷️ WEB SCRAPER - Python Example") print("=" * 80) # Example 1: Quotes scrape करें quotes = scrape_quotes() if quotes: # JSON में save करें save_to_json(quotes) print("\n" + "=" * 80) print("✨ Scraping complete!") print("=" * 80)
टिप्पणियाँ
एक टिप्पणी भेजें