NotificationsYou must be signed in to change notification settings
Fork141
Star403

Commit3864480

authored

scraper: Add BrazzersVR Scraper (#1852)

* Add BrazzersVR ScraperAppears to currently work. More wide spread testing needed. Had some randomness to what scenes were being shown on the index page. The current set of URLs appears to return the correct scenes.The next index page does not appear to show up in the colly request only on a working browser. Had to resort to checking if any scene links where available and advancing if true.* Go fmt & Cleanup* BazzersVR v2Abandoned the OG BazzersVR scraper in favor of the backend API call. Which shares the same API and JSON structure as VirtualPorn. All seems to be in working order.Some of the code for VirtualPorn may still need to be stripped or adjusted based on differing sites. Current things that need be check or adjusted Filenames and Member Link* Remove Debugging Prompt* Go fmt fixes* Code Optimizations & Bug FixThe Origin and Refer are important. It should prevent the collisions in the API call when both scrapers are ran at the same time. AKA VirtualPorn returning scene data for BrazzersAlso updated for parallel scraper optimization* Bug Fix & Code OptimatzionsChange from one run on function input to use a type struct. Makes the code cleaner and easer to read.Another attempt at squashing the API returning results not belong to the correct studio. This should work has the code now checks to ensure the result belongs to the correct scraper. And ignores the rest.Also changed the scraperID to a code used in the API result* Comment cleanup

1 parent197a91b commit3864480Copy full SHA for 3864480

File tree

1 file changed

+65

-24

lines changed

pkg/scrape
- virtualporn.go

1 file changed

+65

-24

lines changed

`‎pkg/scrape/virtualporn.go`

Lines changed: 65 additions & 24 deletions

Original file line number	Diff line number	Diff line change
`@@ -13,33 +13,32 @@ import (`
`13`	`13`	`"github.com/xbapps/xbvr/pkg/models"`
`14`	`14`	`)`
`15`	`15`
`16`		`-funcVirtualPorn(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {`
	`16`	`+funcProject1ServiceAPI(wgmodels.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,siteDatasiteMetaData,limitScrapingbool)error {`
	`17`	`+`
`17`	`18`	`// this scraper is non-standard in that it gathers info via an api rather than scraping html pages`
`18`	`19`	`deferwg.Done()`
`19`		`-scraperID:="bvr"`
`20`		`-siteID:="VirtualPorn"`
`21`		`-logScrapeStart(scraperID,siteID)`
`22`		`-nextApiUrl:=""`
`23`	`20`
`24`		`-siteCollector:=createCollector("virtualporn.com")`
	`21`	`+logScrapeStart(siteData.scraperID,siteData.siteID)`
	`22`	`+nextApiUrl:=""`
	`23`	`+siteCollector:=createCollector(siteData.baseURL)`
`25`	`24`	`apiCollector:=createCollector("site-api.project1service.com")`
`26`	`25`	`offset:=0`
`27`		`-`
`28`	`26`	`apiCollector.OnResponse(func(r*colly.Response) {`
`29`	`27`	`sceneListJson:=gjson.ParseBytes(r.Body)`
`30`	`28`
`31`	`29`	`processScene:=func(scene gjson.Result) {`
`32`	`30`	`sc:= models.ScrapedScene{}`
`33`		`-sc.ScraperID=scraperID`
	`31`	`+sc.ScraperID=siteData.scraperID`
`34`	`32`	`sc.SceneType="VR"`
`35`		`-sc.Studio="BangBros"`
`36`		`-sc.Site=siteID`
	`33`	`+sc.Studio=siteData.studio`
	`34`	`+sc.Site=siteData.siteID`
`37`	`35`	`id:=strconv.Itoa(int(scene.Get("id").Int()))`
`38`		`-sc.SceneID="bvr-"+id`
	`36`	`+sc.SceneID=slugify.Slugify(sc.ScraperID)+"-"+id`
`39`	`37`
`40`	`38`	`sc.Title=scene.Get("title").String()`
`41`		`-sc.HomepageURL="https://virtualporn.com/video/"+id+"/"+slugify.Slugify(strings.ReplaceAll(sc.Title,"'",""))`
`42`		`-sc.MembersUrl="https://site-ma.virtualporn.com/scene/"+id+"/"+slugify.Slugify(strings.ReplaceAll(sc.Title,"'",""))`
	`39`	+sc.HomepageURL=siteData.absoluteURL+`video/`+id+"/"+slugify.Slugify(strings.ReplaceAll(sc.Title,"'",""))
	`40`	`+sc.MembersUrl=siteData.membersURL+id+"/"+slugify.Slugify(strings.ReplaceAll(sc.Title,"'",""))`
	`41`	`+`
`43`	`42`	`sc.Synopsis=scene.Get("description").String()`
`44`	`43`	`dateParts:=strings.Split(scene.Get("dateReleased").String(),"T")`
`45`	`44`	`sc.Released=dateParts[0]`
`@@ -71,7 +70,7 @@ func VirtualPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out`
`71`	`70`	`ifactor.Get("gender").String()=="female" {`
`72`	`71`	`sc.Cast=append(sc.Cast,name)`
`73`	`72`	`}`
`74`		`-sc.ActorDetails[actor.Get("name").String()]= models.ActorDetails{Source:scraperID+" scrape",ProfileUrl:"https://virtualporn.com/model/"+strconv.Itoa(int(actor.Get("id").Int()))+"/"+slugify.Slugify(name)}`
	`73`	`+sc.ActorDetails[actor.Get("name").String()]= models.ActorDetails{Source:scraperID+" scrape",ProfileUrl:siteData.modelURL+strconv.Itoa(int(actor.Get("id").Int()))+"/"+slugify.Slugify(name)}`
`75`	`74`	`returntrue`
`76`	`75`	`})`
`77`	`76`
`@@ -112,12 +111,15 @@ func VirtualPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out`
`112`	`111`	`scenes:=sceneListJson.Get("result")`
`113`	`112`	`ifstrings.Contains(r.Request.URL.RawQuery,"offset=") {`
`114`	`113`	`scenes.ForEach(func(key,scene gjson.Result)bool {`
`115`		`-// check if we have the scene already`
`116`		`-matches:=funk.Filter(knownScenes,func(sstring)bool {`
`117`		`-returnstrings.Contains(s,scene.Get("id").String())`
`118`		`-})`
`119`		`-iffunk.IsEmpty(matches) {`
`120`		`-processScene(scene)`
	`114`	`+// For some reason, the API will occasionally return results belonging to other studios filter them out`
	`115`	`+ifscene.Get("brand").String()==strings.ToLower(siteData.studio) {`
	`116`	`+// check if we have the scene already`
	`117`	`+matches:=funk.Filter(knownScenes,func(sstring)bool {`
	`118`	`+returnstrings.Contains(s,scene.Get("id").String())`
	`119`	`+})`
	`120`	`+iffunk.IsEmpty(matches) {`
	`121`	`+processScene(scene)`
	`122`	`+}`
`121`	`123`	`}`
`122`	`124`	`returntrue`
`123`	`125`	`})`
`@@ -143,6 +145,8 @@ func VirtualPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out`
`143`	`145`	`// set up api requests to use the token in the Instance Header`
`144`	`146`	`apiCollector.OnRequest(func(r*colly.Request) {`
`145`	`147`	`r.Headers.Set("Instance",token)`
	`148`	`+r.Headers.Set("Referer",siteData.absoluteURL)`
	`149`	`+r.Headers.Set("Origin",siteData.absoluteURL)`
`146`	`150`	`})`
`147`	`151`	`apiCollector.Visit(nextApiUrl)`
`148`	`152`	`}`
`@@ -155,23 +159,60 @@ func VirtualPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out`
`155`	`159`	`id:=urlParts[len(urlParts)-2]`
`156`	`160`	`offset=9999// do read more pages, we only need 1`
`157`	`161`	`nextApiUrl="https://site-api.project1service.com/v2/releases/"+id`
`158`		`-siteCollector.Visit("https://virtualporn.com/videos")`
	`162`	+siteCollector.Visit(siteData.absoluteURL+`videos`)
`159`	`163`
`160`	`164`	`}else {`
`161`	`165`	`// call virtualporn.com, this is just to get the instance token to use the api for this session`
`162`	`166`	`nextApiUrl="https://site-api.project1service.com/v2/releases?type=scene&limit=24&offset="+strconv.Itoa(offset)`
`163`		`-siteCollector.Visit("https://virtualporn.com/videos")`
	`167`	+siteCollector.Visit(siteData.absoluteURL+`videos`)
`164`	`168`	`}`
`165`	`169`
`166`	`170`	`ifupdateSite {`
`167`		`-updateSiteLastUpdate(scraperID)`
	`171`	`+updateSiteLastUpdate(siteData.scraperID)`
`168`	`172`	`}`
`169`		`-logScrapeFinished(scraperID,siteID)`
	`173`	`+logScrapeFinished(siteData.scraperID,siteData.siteID)`
`170`	`174`	`returnnil`
`171`	`175`	`}`
`172`	`176`
	`177`	`+typesiteMetaDatastruct {`
	`178`	`+scraperIDstring`
	`179`	`+siteIDstring`
	`180`	`+modelURLstring`
	`181`	`+absoluteURLstring`
	`182`	`+baseURLstring`
	`183`	`+membersURLstring`
	`184`	`+studiostring`
	`185`	`+}`
	`186`	`+`
	`187`	`+funcVirtualPorn(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {`
	`188`	`+bvrMetaData:=siteMetaData{`
	`189`	`+scraperID:"bvr",`
	`190`	`+siteID:"VirtualPorn",`
	`191`	`+modelURL:"https://virtualporn.com/model/",`
	`192`	`+absoluteURL:"https://virtualporn.com/",`
	`193`	`+baseURL:"virtualporn.com",`
	`194`	+membersURL:`https://site-ma.virtualporn.com/`,
	`195`	`+studio:"BangBros",`
	`196`	`+}`
	`197`	`+returnProject1ServiceAPI(wg,updateSite,knownScenes,out,singleSceneURL,singeScrapeAdditionalInfo,&bvrMetaData,limitScraping)`
	`198`	`+}`
	`199`	`+`
	`200`	`+funcBrazzersVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {`
	`201`	`+zzvrMetaData:=siteMetaData{`
	`202`	`+scraperID:"zzvr",`
	`203`	`+siteID:"BrazzersVR",`
	`204`	`+modelURL:"https://www.brazzersvr.com/pornstar/",`
	`205`	`+absoluteURL:"https://www.brazzersvr.com/",`
	`206`	`+baseURL:"www.brazzersvr.com",`
	`207`	+membersURL:`https://site-ma.brazzersvr.com/`,
	`208`	`+studio:"Brazzers",`
	`209`	`+}`
	`210`	`+returnProject1ServiceAPI(wg,updateSite,knownScenes,out,singleSceneURL,singeScrapeAdditionalInfo,&zzvrMetaData,limitScraping)`
	`211`	`+}`
	`212`	`+`
`173`	`213`	`funcinit() {`
`174`	`214`	`registerScraper("bvr","VirtualPorn","https://images.cn77nd.com/members/bangbros/favicon/apple-icon-60x60.png","virtualporn.com",VirtualPorn)`
	`215`	`+registerScraper("zzvr","BrazzersVR","https://images-assets-ht.project1content.com/BrazzersVR/Common/Favicon/63e2a8fdbdbe16.78976344.jpg","brazzersvr.com",BrazzersVR)`
`175`	`216`	`}`
`176`	`217`
`177`	`218`	`// one off conversion routine called by migrations.go`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit3864480

File tree

1 file changed

1 file changed

`‎pkg/scrape/virtualporn.go`

0 commit comments