11import requests
22from bs4 import BeautifulSoup
33
4- proxy = {'http' :'http://SPusername:SPpassword @gate.smartproxy.com:7000' }
5- url = 'http://books.toscrape.com/catalogue/page-1.html'
4+ proxy = {'http' :'http://username:password @gate.smartproxy.com:10000' } # Proxy authentication information
5+ url = 'http://books.toscrape.com/' # Website to make a GET request to
66
7- r = requests .get (url ,proxies = proxy )
8- html = BeautifulSoup (r .content ,'html.parser' )
7+ r = requests .get (url ,proxies = proxy )# Make the GET request to a target URL using proxies
8+ html = BeautifulSoup (r .content ,'html.parser' )# Parse the HTML
99
10- all_books = html .find_all ('article' ,class_ = 'product_pod' )
10+ all_books = html .find_all ('article' ,class_ = 'product_pod' )# Find all article elements with the class "product_pod"
1111
12- for book in all_books :
12+ for book in all_books :# Loop through each element and find the title, price, availability, and description
1313title = book .h3 .a ['title' ]
1414price = book .find ('p' ,class_ = 'price_color' ).text
1515availability = book .find ('p' ,class_ = 'instock availability' ).text .strip ()
1616link_to_book = book .h3 .a ['href' ]
17+ link = "http://books.toscrape.com/{0}" .format (link_to_book )
1718
18- link = "http://books.toscrape.com/catalogue/{0}" .format (link_to_book )
19-
20- r2 = requests .get (link )
19+ r2 = requests .get (link ,proxies = proxy )# Make a new request to the URL extracted earlier
2120html2 = BeautifulSoup (r2 .content ,'html.parser' )
22-
2321description = html2 .find ('p' ,class_ = '' ).text
2422
2523print (title )
2624print (price )
2725print (availability )
28- print ("{0}..." .format (description [:150 ]))
26+ print ("{0}..." .format (description [:150 ]))# Truncate text that is too long (over 150 characters)
2927print (link )
30- print ()
28+ print ()# Print an empty line to separate each result