1- import requests
1+ from requests_html import HTMLSession
22from bs4 import BeautifulSoup as bs
33
4+ # init session
5+ session = HTMLSession ()
6+
47
58def get_video_info (url ):
69# download HTML code
7- content = requests .get (url )
10+ response = session .get (url )
11+ # execute Javascript
12+ response .html .render (sleep = 1 )
813# create beautiful soup object to parse HTML
9- soup = bs (content .content ,"html.parser" )
14+ soup = bs (response .html .html ,"html.parser" )
15+ # open("index.html", "w").write(response.html.html)
1016# initialize the result
1117result = {}
1218# video title
13- result [' title' ]= soup .find ("span" , attrs = { "class" : "watch-title" } ).text .strip ()
19+ result [" title" ]= soup .find ("h1" ).text .strip ()
1420# video views (converted to integer)
15- result [' views' ]= int (soup .find ("div " ,attrs = {"class" :"watch- view-count" }).text [: - 6 ]. replace ( "," , "" ))
21+ result [" views" ]= int ('' . join ([ c for c in soup .find ("span " ,attrs = {"class" :"view-count" }).text if c . isdigit () ] ))
1622# video description
17- result [' description' ]= soup .find ("p " ,attrs = { "id " :"eow-description " }).text
23+ result [" description" ]= soup .find ("yt-formatted-string " ,{ "class " :"content " }).text
1824# date published
19- result ['date_published' ]= soup .find ("strong" ,attrs = {"class" :"watch-time-text" }).text
20- # number of likes as integer
21- result ['likes' ]= int (soup .find ("button" ,attrs = {"title" :"I like this" }).text .replace ("," ,"" ))
22- # number of dislikes as integer
23- result ['dislikes' ]= int (soup .find ("button" ,attrs = {"title" :"I dislike this" }).text .replace ("," ,"" ))
25+ result ["date_published" ]= soup .find ("div" , {"id" :"date" }).text [1 :]
26+ # get the duration of the video
27+ result ["duration" ]= soup .find ("span" , {"class" :"ytp-time-duration" }).text
28+ # get the video tags
29+ result ["tags" ]= ', ' .join ([meta .attrs .get ("content" )for meta in soup .find_all ("meta" , {"property" :"og:video:tag" }) ])
30+ # number of likes
31+ text_yt_formatted_strings = soup .find_all ("yt-formatted-string" , {"id" :"text" ,"class" :"ytd-toggle-button-renderer" })
32+ result ["likes" ]= int ('' .join ([c for c in text_yt_formatted_strings [0 ].attrs .get ("aria-label" )if c .isdigit () ]))
33+ # number of dislikes
34+ result ["dislikes" ]= int ('' .join ([c for c in text_yt_formatted_strings [1 ].attrs .get ("aria-label" )if c .isdigit () ]))
35+
2436# channel details
25- channel_tag = soup .find ("div " ,attrs = {"class" :"yt-user-info " }).find ("a" )
37+ channel_tag = soup .find ("yt-formatted-string " , {"class" :"ytd-channel-name " }).find ("a" )
2638# channel name
2739channel_name = channel_tag .text
2840# channel URL
2941channel_url = f"https://www.youtube.com{ channel_tag ['href' ]} "
3042# number of subscribers as str
31- channel_subscribers = soup .find ("span " ,attrs = { "class " :"yt-subscriber -count" }).text .strip ()
43+ channel_subscribers = soup .find ("yt-formatted-string " ,{ "id " :"owner-sub -count" }).text .strip ()
3244result ['channel' ]= {'name' :channel_name ,'url' :channel_url ,'subscribers' :channel_subscribers }
3345return result
3446
@@ -46,10 +58,12 @@ def get_video_info(url):
4658# print in nice format
4759print (f"Title:{ data ['title' ]} " )
4860print (f"Views:{ data ['views' ]} " )
49- print (f"\n Description:{ data ['description' ]} \n " )
50- print (data ['date_published' ])
61+ print (f"Published at:{ data ['date_published' ]} " )
62+ print (f"Video Duration:{ data ['duration' ]} " )
63+ print (f"Video tags:{ data ['tags' ]} " )
5164print (f"Likes:{ data ['likes' ]} " )
5265print (f"Dislikes:{ data ['dislikes' ]} " )
66+ print (f"\n Description:{ data ['description' ]} \n " )
5367print (f"\n Channel Name:{ data ['channel' ]['name' ]} " )
5468print (f"Channel URL:{ data ['channel' ]['url' ]} " )
5569print (f"Channel Subscribers:{ data ['channel' ]['subscribers' ]} " )