Official Python SDK for ScrapeGraphAI

pip install scrapegraph-pyfrom scrapegraph_pyimport Clientclient= Client(api_key="your-api-key-here")SGAI_API_KEY environment variable and initialize the client without parameters:client = Client()response= client.smartscraper( website_url="https://example.com", user_prompt="Extract the main heading and description")| Parameter | Type | Required | Description |
|---|---|---|---|
| website_url | string | Yes | The URL of the webpage that needs to be scraped. |
| user_prompt | string | Yes | A textual description of what you want to achieve. |
| output_schema | object | No | The Pydantic object that describes the structure and format of the response. |
| render_heavy_js | boolean | No | Enable enhanced JavaScript rendering for heavy JS websites (React, Vue, Angular, etc.). Default: False |
Basic Schema Example
from pydanticimport BaseModel, Fieldclass ArticleData(BaseModel): title:str = Field(description="The article title") author:str = Field(description="The author's name") publish_date:str = Field(description="Article publication date") content:str = Field(description="Main article content") category:str = Field(description="Article category")response= client.smartscraper( website_url="https://example.com/blog/article", user_prompt="Extract the article information", output_schema=ArticleData)print(f"Title:{response.title}")print(f"Author:{response.author}")print(f"Published:{response.publish_date}")Advanced Schema Example
from typingimport Listfrom pydanticimport BaseModel, Fieldclass Employee(BaseModel): name:str = Field(description="Employee's full name") position:str = Field(description="Job title") department:str = Field(description="Department name") email:str = Field(description="Email address")class Office(BaseModel): location:str = Field(description="Office location/city") address:str = Field(description="Full address") phone:str = Field(description="Contact number")class CompanyData(BaseModel): name:str = Field(description="Company name") description:str = Field(description="Company description") industry:str = Field(description="Industry sector") founded_year:int = Field(description="Year company was founded") employees: List[Employee]= Field(description="List of key employees") offices: List[Office]= Field(description="Company office locations") website:str = Field(description="Company website URL")# Extract comprehensive company informationresponse= client.smartscraper( website_url="https://example.com/about", user_prompt="Extract detailed company information including employees and offices", output_schema=CompanyData)# Access nested dataprint(f"Company:{response.name}")print("\nKey Employees:")for employeein response.employees: print(f"-{employee.name} ({employee.position})")print("\nOffice Locations:")for officein response.offices: print(f"-{office.location}:{office.address}")Enhanced JavaScript Rendering Example
from scrapegraph_pyimport Clientfrom pydanticimport BaseModel, Fieldclass ProductInfo(BaseModel): name:str = Field(description="Product name") price:str = Field(description="Product price") description:str = Field(description="Product description") availability:str = Field(description="Product availability status")client= Client(api_key="your-api-key")# Enable enhanced JavaScript rendering for a React-based e-commerce siteresponse= client.smartscraper( website_url="https://example-react-store.com/products/123", user_prompt="Extract product details including name, price, description, and availability", output_schema=ProductInfo, render_heavy_js=True # Enable for React/Vue/Angular sites)print(f"Product:{response['result']['name']}")print(f"Price:{response['result']['price']}")print(f"Available:{response['result']['availability']}")render_heavy_js:response= client.searchscraper( user_prompt="What are the key features and pricing of ChatGPT Plus?")| Parameter | Type | Required | Description |
|---|---|---|---|
| user_prompt | string | Yes | A textual description of what you want to achieve. |
| num_results | number | No | Number of websites to search (3-20). Default: 3. |
| extraction_mode | boolean | No | True = AI extraction mode (10 credits/page),False = markdown mode (2 credits/page). Default: True |
| output_schema | object | No | The Pydantic object that describes the structure and format of the response (AI extraction mode only) |
Basic Schema Example
from pydanticimport BaseModel, Fieldfrom typingimport Listclass ProductInfo(BaseModel): name:str = Field(description="Product name") description:str = Field(description="Product description") price:str = Field(description="Product price") features: List[str]= Field(description="List of key features") availability:str = Field(description="Availability information")response= client.searchscraper( user_prompt="Find information about iPhone 15 Pro", output_schema=ProductInfo)print(f"Product:{response.name}")print(f"Price:{response.price}")print("\nFeatures:")for featurein response.features: print(f"-{feature}")Advanced Schema Example
from typingimport Listfrom pydanticimport BaseModel, Fieldclass MarketPlayer(BaseModel): name:str = Field(description="Company name") market_share:str = Field(description="Market share percentage") key_products: List[str]= Field(description="Main products in market") strengths: List[str]= Field(description="Company's market strengths")class MarketTrend(BaseModel): name:str = Field(description="Trend name") description:str = Field(description="Trend description") impact:str = Field(description="Expected market impact") timeframe:str = Field(description="Trend timeframe")class MarketAnalysis(BaseModel): market_size:str = Field(description="Total market size") growth_rate:str = Field(description="Annual growth rate") key_players: List[MarketPlayer]= Field(description="Major market players") trends: List[MarketTrend]= Field(description="Market trends") challenges: List[str]= Field(description="Industry challenges") opportunities: List[str]= Field(description="Market opportunities")# Perform comprehensive market researchresponse= client.searchscraper( user_prompt="Analyze the current AI chip market landscape", output_schema=MarketAnalysis)# Access structured market dataprint(f"Market Size:{response.market_size}")print(f"Growth Rate:{response.growth_rate}")print("\nKey Players:")for playerin response.key_players: print(f"\n{player.name}") print(f"Market Share:{player.market_share}") print("Key Products:") for productin player.key_products: print(f"-{product}")print("\nMarket Trends:")for trendin response.trends: print(f"\n{trend.name}") print(f"Impact:{trend.impact}") print(f"Timeframe:{trend.timeframe}")Markdown Mode Example
from scrapegraph_pyimport Clientclient= Client(api_key="your-api-key")# Enable markdown mode for cost-effective content gatheringresponse= client.searchscraper( user_prompt="Latest developments in artificial intelligence", num_results=3, extraction_mode=False # Enable markdown mode (2 credits per page vs 10 credits))# Access the raw markdown contentmarkdown_content= response['markdown_content']reference_urls= response['reference_urls']print(f"Markdown content length:{len(markdown_content)} characters")print(f"Reference URLs:{len(reference_urls)}")# Process the markdown contentprint("Content preview:", markdown_content[:500]+ "...")# Save to file for analysiswith open('ai_research_content.md','w',encoding='utf-8')as f: f.write(markdown_content)print("Content saved to ai_research_content.md")response= client.markdownify( website_url="https://example.com")import asynciofrom scrapegraph_pyimport AsyncClientasync def main(): async with AsyncClient()as client: response= await client.smartscraper( website_url="https://example.com", user_prompt="Extract the main content" ) print(response)asyncio.run(main())client.submit_feedback( request_id="your-request-id", rating=5, feedback_text="Great results!")License
Was this page helpful?