Movatterモバイル変換

■増田納め2023年のマイ日記集計

こんます～

2023年も残すところわずかとなりましたが、皆様方におかれましてはいかがお過ごしでしょうか。

一年間の振り返りなどはされましたでしょうか。

私、一年間に書いた増田を振り返ってみましたところ、

2423件の日記を綴っており、

頂いた総ブクマ数は1893、総トラバ数は1060となりました。

これもひとえに皆様方のご支援ご声援のおかげであります。

本年も大変お世話になりました。

最期に、ポンコツの私がChatGPTの手となり足となり作成した増田集計コードを掲載します。

各日記のURL、タイトル、投稿日時、文字数、被ブクマ数、被トラバ数を取得しCSV ファイルに出力するものです。

お暇な方はお使いください。

それではよいお年をお迎えください。

import requestsfrom bs4 import BeautifulSoupimporttimeimportcsvimportosimport re#ログインURLlogin_url = 'https://hatelabo.jp/login'#ログイン情報login_data = {    'key': 'あなたのユーザ名またはメールアドレス',    'password': 'あなたのパスワード',    'mode': 'enter'}user_name = 'あなたのユーザ名'#User-Agent ヘッダー（例：Google Chrome）headers = {    'User-Agent': 'Mozilla/5.0 (Windows NT10.0;Win64;x64) AppleWebKit/537.36 (KHTML,likeGecko)Chrome/58.0.3029.110Safari/537.3'}#セッションを開始session = requests.Session()#ログインresponse = session.post(login_url, data=login_data, headers=headers)print('login',response.status_code)# 集計データitem = {    'url': '', #URL    'title': '', #タイトル    'datetime': '', #投稿日時    'characters': '', #文字数    'bookmark': '', # 被ブクマ数    'trackback': '', # 被トラバ数}#CSVファイル名output_file = 'masuda_output.csv'#CSVファイルが存在しない場合はヘッダーを書き込むif notos.path.exists(output_file):    withopen(output_file, 'w', newline='', encoding='utf-8')as file:writer =csv.DictWriter(file, fieldnames=item.keys())writer.writeheader()# 集計page_start = 1page_end = 3for i in range(page_start, page_end+1):        # 待機time.sleep(3)    #増田一覧取得    page = session.get(f'https://anond.hatelabo.jp/{user_name}/?page={i}')print(page.url)        # 応答のHTMLをBeautifulSoupで解析    soup = BeautifulSoup(page.content, 'html.parser')        entries = soup.find_all('div', class_='section')    for entry in entries:        header = entry.find('h3')timestamp = header.find('a').get('href')[1:]        item['url'] = 'https://anond.hatelabo.jp/'+timestamp        item['title'] = header.get_text()[:-1]        item['datetime'] = f"{timestamp[0:4]}/{timestamp[4:6]}/{timestamp[6:8]} {timestamp[8:10]}:{timestamp[10:12]}"        footersection_text = entry.find_all('p')[-2].get_text()                    item['characters'] =len(entry.find('p').get_text().strip(footersection_text))        item['trackback'] = int(re.search(r'&#92;((.*?)&#92;)', footersection_text).group(1) if re.search(r'&#92;((.*?)&#92;)', footersection_text) else '')        if item['title'] == '■':            item['title'] = entry.find('p').get_text().strip(footersection_text)[:35]        # 待機time.sleep(3)bookmark_page = session.get(f'https://b.hatena.ne.jp/entry/button/?url=https%3A%2F%2Fanond.hatelabo.jp%2F{timestamp}&amp;amp;layout=basic-label-counter&amp;amp;lang=ja')        soup_b = BeautifulSoup(bookmark_page.content, 'html.parser')        item['bookmark'] = int(soup_b.find('a', class_='count').get_text())        #CSVファイルに追記        withopen(output_file, 'a', newline='', encoding='utf-8')as file:writer =csv.DictWriter(file, fieldnames=item.keys())writer.writerow(item)