Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit10e16a5

Browse files
committed
edited extract youtube data tutorial
1 parente8c4869 commit10e16a5

File tree

2 files changed

+30
-16
lines changed

2 files changed

+30
-16
lines changed
Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,46 @@
1-
importrequests
1+
fromrequests_htmlimportHTMLSession
22
frombs4importBeautifulSoupasbs
33

4+
# init session
5+
session=HTMLSession()
6+
47

58
defget_video_info(url):
69
# download HTML code
7-
content=requests.get(url)
10+
response=session.get(url)
11+
# execute Javascript
12+
response.html.render(sleep=1)
813
# create beautiful soup object to parse HTML
9-
soup=bs(content.content,"html.parser")
14+
soup=bs(response.html.html,"html.parser")
15+
# open("index.html", "w").write(response.html.html)
1016
# initialize the result
1117
result= {}
1218
# video title
13-
result['title']=soup.find("span",attrs={"class":"watch-title"}).text.strip()
19+
result["title"]=soup.find("h1").text.strip()
1420
# video views (converted to integer)
15-
result['views']=int(soup.find("div",attrs={"class":"watch-view-count"}).text[:-6].replace(",",""))
21+
result["views"]=int(''.join([cforcinsoup.find("span",attrs={"class":"view-count"}).textifc.isdigit() ]))
1622
# video description
17-
result['description']=soup.find("p",attrs={"id":"eow-description"}).text
23+
result["description"]=soup.find("yt-formatted-string",{"class":"content"}).text
1824
# date published
19-
result['date_published']=soup.find("strong",attrs={"class":"watch-time-text"}).text
20-
# number of likes as integer
21-
result['likes']=int(soup.find("button",attrs={"title":"I like this"}).text.replace(",",""))
22-
# number of dislikes as integer
23-
result['dislikes']=int(soup.find("button",attrs={"title":"I dislike this"}).text.replace(",",""))
25+
result["date_published"]=soup.find("div", {"id":"date"}).text[1:]
26+
# get the duration of the video
27+
result["duration"]=soup.find("span", {"class":"ytp-time-duration"}).text
28+
# get the video tags
29+
result["tags"]=', '.join([meta.attrs.get("content")formetainsoup.find_all("meta", {"property":"og:video:tag"}) ])
30+
# number of likes
31+
text_yt_formatted_strings=soup.find_all("yt-formatted-string", {"id":"text","class":"ytd-toggle-button-renderer"})
32+
result["likes"]=int(''.join([cforcintext_yt_formatted_strings[0].attrs.get("aria-label")ifc.isdigit() ]))
33+
# number of dislikes
34+
result["dislikes"]=int(''.join([cforcintext_yt_formatted_strings[1].attrs.get("aria-label")ifc.isdigit() ]))
35+
2436
# channel details
25-
channel_tag=soup.find("div",attrs={"class":"yt-user-info"}).find("a")
37+
channel_tag=soup.find("yt-formatted-string", {"class":"ytd-channel-name"}).find("a")
2638
# channel name
2739
channel_name=channel_tag.text
2840
# channel URL
2941
channel_url=f"https://www.youtube.com{channel_tag['href']}"
3042
# number of subscribers as str
31-
channel_subscribers=soup.find("span",attrs={"class":"yt-subscriber-count"}).text.strip()
43+
channel_subscribers=soup.find("yt-formatted-string",{"id":"owner-sub-count"}).text.strip()
3244
result['channel']= {'name':channel_name,'url':channel_url,'subscribers':channel_subscribers}
3345
returnresult
3446

@@ -46,10 +58,12 @@ def get_video_info(url):
4658
# print in nice format
4759
print(f"Title:{data['title']}")
4860
print(f"Views:{data['views']}")
49-
print(f"\nDescription:{data['description']}\n")
50-
print(data['date_published'])
61+
print(f"Published at:{data['date_published']}")
62+
print(f"Video Duration:{data['duration']}")
63+
print(f"Video tags:{data['tags']}")
5164
print(f"Likes:{data['likes']}")
5265
print(f"Dislikes:{data['dislikes']}")
66+
print(f"\nDescription:{data['description']}\n")
5367
print(f"\nChannel Name:{data['channel']['name']}")
5468
print(f"Channel URL:{data['channel']['url']}")
5569
print(f"Channel Subscribers:{data['channel']['subscribers']}")
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
requests
1+
requests_html
22
bs4

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp