1+ from bs4 import BeautifulSoup
2+ import requests
3+ import json
4+ base_url = "https://www.goodreads.com/quotes/tag/{0}?page={1}" # the url of the site from where quotes
5+ #will be scrapped emotion and page number will be inserted later
6+
7+ def process (content ,emotion ):# function to clean the content of the webpage
8+ soup = BeautifulSoup (content ,features = "html5lib" )
9+ quotes_div = soup .find_all ("div" ,attrs = {"class" ,"quote" })
10+ quotes = []
11+ for div in quotes_div :
12+ q_text = div .find ("div" ,attrs = {"class" ,"quoteText" })
13+ quote = (q_text .text .strip ().split ('\n ' )[0 ])
14+ author = q_text .find ("span" ,attrs = {"class" ,"authorOrTitle" }).text .strip ()
15+ q_dict = {"quote" :quote ,"author" :author ,"emotion" :emotion }
16+ quotes .append (q_dict )
17+ return quotes
18+
19+ emotions = ['friend' ,'sad' ]# you can select any other emotion
20+ quotes = []
21+ for emotion in emotions :
22+ for index in range (1 ,5 ):# here 5 pages have been taken
23+ final_url = base_url .format (emotion ,index )
24+ page = requests .get (final_url )
25+ content = page .text
26+ quotes += process (content ,emotion )
27+
28+ with open ('quote.json' ,'w' )as file :# dump the quotes in json file
29+ json .dump (quotes ,file )