NotificationsYou must be signed in to change notification settings
Fork141
Star403

Commitf10bdc9

authored

scraper: add SwallowBay native scraper (#1715)

Co-authored-by: crwxaj <crwxaj>

1 parent46cca32 commitf10bdc9Copy full SHA for f10bdc9

File tree

2 files changed

+144

-6

lines changed

pkg
- config
  - scrapers.json
- scrape
  - swallowbay.go

2 files changed

+144

-6

lines changed

`‎pkg/config/scrapers.json`

Lines changed: 0 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -367,12 +367,6 @@`
`367`	`367`	`"company":"SuckMeVR",`
`368`	`368`	`"avatar_url":"https://cdn-vr.sexlikereal.com/images/studio_creatives/logotypes/1/403/logo_crop_1657112082.png"`
`369`	`369`	`},`
`370`		`- {`
`371`		`-"url":"https://www.sexlikereal.com/studios/swallowbay",`
`372`		`-"name":"SwallowBay",`
`373`		`-"company":"SwallowBay",`
`374`		`-"avatar_url":"https://mcdn.vrporn.com/files/20210330092926/swallowbay-400x400.jpg"`
`375`		`- },`
`376`	`370`	`{`
`377`	`371`	`"url":"https://www.sexlikereal.com/studios/sweetlonglips",`
`378`	`372`	`"name":"Sweetlonglips",`

`‎pkg/scrape/swallowbay.go`

Lines changed: 144 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,144 @@`
	`1`	`+package scrape`
	`2`	`+`
	`3`	`+import (`
	`4`	`+"encoding/json"`
	`5`	`+"fmt"`
	`6`	`+"regexp"`
	`7`	`+"strconv"`
	`8`	`+"strings"`
	`9`	`+"sync"`
	`10`	`+`
	`11`	`+"github.com/gocolly/colly/v2"`
	`12`	`+"github.com/nleeper/goment"`
	`13`	`+"github.com/thoas/go-funk"`
	`14`	`+"github.com/xbapps/xbvr/pkg/models"`
	`15`	`+)`
	`16`	`+`
	`17`	`+funcSwallowBay(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {`
	`18`	`+deferwg.Done()`
	`19`	`+scraperID:="swallowbay"`
	`20`	`+siteID:="SwallowBay"`
	`21`	`+logScrapeStart(scraperID,siteID)`
	`22`	`+`
	`23`	`+sceneCollector:=createCollector("swallowbay.com")`
	`24`	`+siteCollector:=createCollector("swallowbay.com")`
	`25`	`+`
	`26`	+sceneCollector.OnHTML(`html`,func(e*colly.HTMLElement) {
	`27`	`+sc:= models.ScrapedScene{}`
	`28`	`+sc.ScraperID=scraperID`
	`29`	`+sc.SceneType="VR"`
	`30`	`+sc.Studio="SwallowBay"`
	`31`	`+sc.Site=siteID`
	`32`	`+sc.SiteID=""`
	`33`	`+sc.HomepageURL=e.Request.URL.String()`
	`34`	`+`
	`35`	+regexpSceneID:=regexp.MustCompile(`\-(\d+)\.html$`)
	`36`	`+sc.SiteID=regexpSceneID.FindStringSubmatch(e.Request.URL.Path)[1]`
	`37`	`+`
	`38`	`+// Title`
	`39`	+e.ForEach(`div.content-title h1`,func(idint,e*colly.HTMLElement) {
	`40`	`+sc.Title=strings.TrimSpace(e.Text)`
	`41`	`+})`
	`42`	`+`
	`43`	`+// Cover URLs`
	`44`	+e.ForEach(`dl8-video`,func(idint,e*colly.HTMLElement) {
	`45`	`+coverUrl:=e.Attr("poster")`
	`46`	`+sc.Covers=append(sc.Covers,coverUrl)`
	`47`	`+})`
	`48`	`+`
	`49`	`+// Cast`
	`50`	`+sc.ActorDetails=make(map[string]models.ActorDetails)`
	`51`	+e.ForEach(`div.content-models a`,func(idint,e*colly.HTMLElement) {
	`52`	`+ifstrings.TrimSpace(e.Text)!="" {`
	`53`	`+sc.Cast=append(sc.Cast,strings.TrimSpace(e.Attr("title")))`
	`54`	`+sc.ActorDetails[strings.TrimSpace(e.Text)]= models.ActorDetails{Source:sc.ScraperID+" scrape",ProfileUrl:e.Attr("href")}`
	`55`	`+}`
	`56`	`+})`
	`57`	`+`
	`58`	`+// Tags`
	`59`	`+ignoreTags:= []string{"vr 180","vr 6k","8k","iphone","ultra high definition"}`
	`60`	+e.ForEach(`div.content-tags a`,func(idint,e*colly.HTMLElement) {
	`61`	`+tag:=strings.ToLower(strings.TrimSpace(e.Text))`
	`62`	`+iftag!="" {`
	`63`	`+for_,v:=rangeignoreTags {`
	`64`	`+iftag==v {`
	`65`	`+return`
	`66`	`+}`
	`67`	`+}`
	`68`	`+sc.Tags=append(sc.Tags,tag)`
	`69`	`+}`
	`70`	`+})`
	`71`	`+`
	`72`	`+// Synposis`
	`73`	+e.ForEach(`div.content-desc.active`,func(idint,e*colly.HTMLElement) {
	`74`	`+sc.Synopsis=strings.TrimSpace(strings.TrimSpace(e.Text))`
	`75`	`+})`
	`76`	`+`
	`77`	`+// Release date`
	`78`	+e.ForEach(`div.content-data div.content-date`,func(idint,e*colly.HTMLElement) {
	`79`	`+date:=strings.TrimSuffix(e.Text,"Date: ")`
	`80`	`+tmpDate,_:=goment.New(strings.TrimSpace(date),"Do MMM, YYYY:")`
	`81`	`+sc.Released=tmpDate.Format("YYYY-MM-DD")`
	`82`	`+})`
	`83`	`+`
	`84`	`+// Duration`
	`85`	+e.ForEach(`div.content-data div.content-time`,func(idint,e*colly.HTMLElement) {
	`86`	`+parts:=strings.Split(e.Text,":")`
	`87`	`+iflen(parts)>1 {`
	`88`	`+tmpDuration,err:=strconv.Atoi(strings.TrimSpace(parts[1]))`
	`89`	`+iferr==nil {`
	`90`	`+sc.Duration=tmpDuration`
	`91`	`+}`
	`92`	`+}`
	`93`	`+})`
	`94`	`+`
	`95`	`+// No filename information yet`
	`96`	`+`
	`97`	`+sc.TrailerType="urls"`
	`98`	`+vartrailers []models.VideoSource`
	`99`	+e.ForEach(`dl8-video source`,func(idint,e*colly.HTMLElement) {
	`100`	`+trailers=append(trailers, models.VideoSource{URL:e.Attr("src"),Quality:strings.TrimSpace(e.Attr("quality"))})`
	`101`	`+})`
	`102`	`+trailerJson,_:=json.Marshal(models.VideoSourceResponse{VideoSources:trailers})`
	`103`	`+sc.TrailerSrc=string(trailerJson)`
	`104`	`+`
	`105`	`+ifsc.SiteID!="" {`
	`106`	`+sc.SceneID=fmt.Sprintf("swallowbay-%v",sc.SiteID)`
	`107`	`+`
	`108`	`+// save only if we got a SceneID`
	`109`	`+out<-sc`
	`110`	`+}`
	`111`	`+})`
	`112`	`+`
	`113`	+siteCollector.OnHTML(`div.pagination a`,func(e*colly.HTMLElement) {
	`114`	`+if!limitScraping {`
	`115`	`+pageURL:=e.Request.AbsoluteURL(e.Attr("href"))`
	`116`	`+siteCollector.Visit(pageURL)`
	`117`	`+}`
	`118`	`+})`
	`119`	`+`
	`120`	+siteCollector.OnHTML(`div.-video div.item-name a`,func(e*colly.HTMLElement) {
	`121`	`+sceneURL:=e.Request.AbsoluteURL(e.Attr("href"))`
	`122`	`+`
	`123`	`+// If scene exist in database, there's no need to scrape`
	`124`	`+if!funk.ContainsString(knownScenes,sceneURL) {`
	`125`	`+sceneCollector.Visit(sceneURL)`
	`126`	`+}`
	`127`	`+})`
	`128`	`+`
	`129`	`+ifsingleSceneURL!="" {`
	`130`	`+sceneCollector.Visit(singleSceneURL)`
	`131`	`+}else {`
	`132`	`+siteCollector.Visit("https://swallowbay.com/most-recent/")`
	`133`	`+}`
	`134`	`+`
	`135`	`+ifupdateSite {`
	`136`	`+updateSiteLastUpdate(scraperID)`
	`137`	`+}`
	`138`	`+logScrapeFinished(scraperID,siteID)`
	`139`	`+returnnil`
	`140`	`+}`
	`141`	`+`
	`142`	`+funcinit() {`
	`143`	`+registerScraper("swallowbay","SwallowBay","https://swallowbay.com/templates/swallowbay/images/favicons/apple-icon-180x180.png","swallowbay.com",SwallowBay)`
	`144`	`+}`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitf10bdc9

File tree

2 files changed

2 files changed

`‎pkg/config/scrapers.json`

`‎pkg/scrape/swallowbay.go`

0 commit comments