- Notifications
You must be signed in to change notification settings - Fork108
Add Media Indonesia Publisher#804
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
base:master
Are you sure you want to change the base?
Uh oh!
There was an error while loading.Please reload this page.
Changes fromall commits
File filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
- Loading branch information
Uh oh!
There was an error while loading.Please reload this page.
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| from fundus.publishers.base_objects import Publisher, PublisherGroup | ||
| from fundus.publishers.id.media_indonesia import MediaIndonesiaParser | ||
| from fundus.scraping.filter import inverse, regex_filter | ||
| from fundus.scraping.url import Sitemap | ||
| class ID(metaclass=PublisherGroup): | ||
| default_language = "id" | ||
| MediaIndonesia = Publisher( | ||
| name="Media Indonesia", | ||
| domain="https://www.mediaindonesia.com/", | ||
| parser=MediaIndonesiaParser, | ||
| sources=[Sitemap("https://mediaindonesia.com/sitemap.xml")], | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| import datetime | ||
| from typing import List, Optional | ||
| from lxml.cssselect import CSSSelector | ||
| from fundus.parser import BaseParser, ParserProxy | ||
| from fundus.parser.base_parser import attribute | ||
| from fundus.parser.data import ArticleBody | ||
| from fundus.parser.utility import ( | ||
| extract_article_body_with_selector, | ||
| generic_author_parsing, | ||
| generic_date_parsing, | ||
| generic_topic_parsing, | ||
| image_extraction, | ||
| ) | ||
| class MediaIndonesiaParser(ParserProxy): | ||
| class V1(BaseParser): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more.
| ||
| _paragraph_selector = CSSSelector("div.article") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. This paragraph selector selects just the entire article as one big paragraph. You should consider something like | ||
| _subheadline_selector = CSSSelector("div.article > h2") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Thisarticle uses a different formatting for the subheadlines. | ||
| @attribute | ||
| def title(self) -> Optional[str]: | ||
| return self.precomputed.ld.bf_search("headline") | ||
| @attribute | ||
| def body(self) -> Optional[ArticleBody]: | ||
| return extract_article_body_with_selector( | ||
| self.precomputed.doc, | ||
| subheadline_selector=self._subheadline_selector, | ||
| paragraph_selector=self._paragraph_selector, | ||
| ) | ||
| @attribute | ||
| def authors(self) -> List[str]: | ||
| return generic_author_parsing(self.precomputed.ld.bf_search("author")) | ||
| @attribute | ||
| def publishing_date(self) -> Optional[datetime.datetime]: | ||
| return generic_date_parsing(self.precomputed.ld.bf_search("datePublished")) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| { | ||
| "V1": { | ||
| "authors": [ | ||
| "Andhika" | ||
| ], | ||
| "body": { | ||
| "summary": [], | ||
| "sections": [ | ||
| { | ||
| "headline": [], | ||
| "paragraphs": [ | ||
| "Kondisi Tambang di Raja Ampat(Auriga Nusantara) Komisi Pemberantasan Korupsi (KPK) mengaku hingga saat ini belum menemukan surat keputusan (SK) resmi terkait pencabutan empat izin usaha pertambangan (IUP) nikel di Raja Ampat, Papua Barat Daya. Padahal, pemerintah sudah mengumumkan pencabutan izin pada Juni 2025. Kepala Satuan Tugas Koordinasi dan Supervisi KPK Wilayah V, Dian Patria, mengatakan pihaknya masih mencari kejelasan mengenai dokumen resmi pencabutan tersebut. “Dicabut di Istana Negara bulan Juni, tapi terus terang sampai detik ini kami belum pernah melihat SK pencabutannya,” ujar Dian di Gedung Merah Putih KPK, Jakarta, Selasa (21/10). Dian menjelaskan bahwa tim KPK telah menelusuri ke sejumlah kementerian, termasuk Kementerian ESDM dan Kementerian Investasi/Badan Koordinasi Penanaman Modal (BKPM), namun belum memperoleh dokumen yang dimaksud.Baca juga : Hasil Penyelidikan Tambang Nikel Raja Ampat Segera Dirilis “Kami tanya ke Minerba, jawabnya di BKPM. Kami tanya ke BKPM, katanya belum ada surat dari Minerba. Setelah dicek ulang, katanya surat sudah masuk dan sedang diproses,” paparnya. Ia pun mempertanyakan keseriusan pemerintah dalam menindaklanjuti pencabutan empat IUP tambang nikel Raja Ampat yang sempat diumumkan secara publik. “Apakah pemerintah benar-benar serius mencabut empat IUP di Raja Ampat yang diumumkan di Istana Negara? Karena sampai sekarang tidak ada dokumennya sama sekali,” tegas Dian.Baca juga : Pengamat Soroti Peredaran Gambar AI Raja Ampat, Partisipasi Publik yang Sehat Harus Dilandasi Fakta Meski demikian, KPK memastikan tidak ada aktivitas pertambangan di empat lokasi tersebut berdasarkan hasil pemantauan lapangan. Empat perusahaan yang izin usahanya dicabut adalah PT Anugerah Surya Pratama, PT Nurham, PT Mulia Raymond Perkasa, dan PT Kawei Sejahtera Mining. Pencabutan dilakukan karena perusahaan-perusahaan itu terbukti melakukan pelanggaran lingkungan di kawasan geowisata dan geopark Raja Ampat. Sebelumnya, Menteri ESDM menyebut langkah pencabutan IUP tambang Raja Ampat merupakan bagian dari upaya menjaga kawasan geopark Raja Ampat agar tidak rusak akibat aktivitas tambang, sekaligus memastikan pengelolaan sumber daya alam tetap berkelanjutan. Cek berita dan artikel yg lain di Google News dan dan ikuti WhatsApp channel mediaindonesia.com Editor : Andhika" | ||
| ] | ||
| } | ||
| ] | ||
| }, | ||
| "publishing_date": "2025-10-22 06:59:00+07:00", | ||
| "title": "KPK tak Temukan Surat Pencabutan IUP Nikel Raja Ampat Apa Iya Betul Dicabut" | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| { | ||
| "MediaIndonesia_2025_10_22.html.gz": { | ||
| "url": "https://mediaindonesia.com/ekonomi/822959/kpk-tak-temukan-surat-pencabutan-iup-nikel-raja-ampat-apa-iya-betul-dicabut", | ||
| "crawl_date": "2025-10-22 02:09:31.055285" | ||
| } | ||
| } |