Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3864480

Browse files
authored
scraper: Add BrazzersVR Scraper (#1852)
* Add BrazzersVR ScraperAppears to currently work. More wide spread testing needed. Had some randomness to what scenes were being shown on the index page. The current set of URLs appears to return the correct scenes.The next index page does not appear to show up in the colly request only on a working browser. Had to resort to checking if any scene links where available and advancing if true.* Go fmt & Cleanup* BazzersVR v2Abandoned the OG BazzersVR scraper in favor of the backend API call. Which shares the same API and JSON structure as VirtualPorn. All seems to be in working order.Some of the code for VirtualPorn may still need to be stripped or adjusted based on differing sites. Current things that need be check or adjusted Filenames and Member Link* Remove Debugging Prompt* Go fmt fixes* Code Optimizations & Bug FixThe Origin and Refer are important. It should prevent the collisions in the API call when both scrapers are ran at the same time. AKA VirtualPorn returning scene data for BrazzersAlso updated for parallel scraper optimization* Bug Fix & Code OptimatzionsChange from one run on function input to use a type struct. Makes the code cleaner and easer to read.Another attempt at squashing the API returning results not belong to the correct studio. This should work has the code now checks to ensure the result belongs to the correct scraper. And ignores the rest.Also changed the scraperID to a code used in the API result* Comment cleanup
1 parent197a91b commit3864480

File tree

1 file changed

+65
-24
lines changed

1 file changed

+65
-24
lines changed

‎pkg/scrape/virtualporn.go

Lines changed: 65 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,32 @@ import (
1313
"github.com/xbapps/xbvr/pkg/models"
1414
)
1515

16-
funcVirtualPorn(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
16+
funcProject1ServiceAPI(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,siteData*siteMetaData,limitScrapingbool)error {
17+
1718
// this scraper is non-standard in that it gathers info via an api rather than scraping html pages
1819
deferwg.Done()
19-
scraperID:="bvr"
20-
siteID:="VirtualPorn"
21-
logScrapeStart(scraperID,siteID)
22-
nextApiUrl:=""
2320

24-
siteCollector:=createCollector("virtualporn.com")
21+
logScrapeStart(siteData.scraperID,siteData.siteID)
22+
nextApiUrl:=""
23+
siteCollector:=createCollector(siteData.baseURL)
2524
apiCollector:=createCollector("site-api.project1service.com")
2625
offset:=0
27-
2826
apiCollector.OnResponse(func(r*colly.Response) {
2927
sceneListJson:=gjson.ParseBytes(r.Body)
3028

3129
processScene:=func(scene gjson.Result) {
3230
sc:= models.ScrapedScene{}
33-
sc.ScraperID=scraperID
31+
sc.ScraperID=siteData.scraperID
3432
sc.SceneType="VR"
35-
sc.Studio="BangBros"
36-
sc.Site=siteID
33+
sc.Studio=siteData.studio
34+
sc.Site=siteData.siteID
3735
id:=strconv.Itoa(int(scene.Get("id").Int()))
38-
sc.SceneID="bvr-"+id
36+
sc.SceneID=slugify.Slugify(sc.ScraperID)+"-"+id
3937

4038
sc.Title=scene.Get("title").String()
41-
sc.HomepageURL="https://virtualporn.com/video/"+id+"/"+slugify.Slugify(strings.ReplaceAll(sc.Title,"'",""))
42-
sc.MembersUrl="https://site-ma.virtualporn.com/scene/"+id+"/"+slugify.Slugify(strings.ReplaceAll(sc.Title,"'",""))
39+
sc.HomepageURL=siteData.absoluteURL+`video/`+id+"/"+slugify.Slugify(strings.ReplaceAll(sc.Title,"'",""))
40+
sc.MembersUrl=siteData.membersURL+id+"/"+slugify.Slugify(strings.ReplaceAll(sc.Title,"'",""))
41+
4342
sc.Synopsis=scene.Get("description").String()
4443
dateParts:=strings.Split(scene.Get("dateReleased").String(),"T")
4544
sc.Released=dateParts[0]
@@ -71,7 +70,7 @@ func VirtualPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out
7170
ifactor.Get("gender").String()=="female" {
7271
sc.Cast=append(sc.Cast,name)
7372
}
74-
sc.ActorDetails[actor.Get("name").String()]= models.ActorDetails{Source:scraperID+" scrape",ProfileUrl:"https://virtualporn.com/model/"+strconv.Itoa(int(actor.Get("id").Int()))+"/"+slugify.Slugify(name)}
73+
sc.ActorDetails[actor.Get("name").String()]= models.ActorDetails{Source:scraperID+" scrape",ProfileUrl:siteData.modelURL+strconv.Itoa(int(actor.Get("id").Int()))+"/"+slugify.Slugify(name)}
7574
returntrue
7675
})
7776

@@ -112,12 +111,15 @@ func VirtualPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out
112111
scenes:=sceneListJson.Get("result")
113112
ifstrings.Contains(r.Request.URL.RawQuery,"offset=") {
114113
scenes.ForEach(func(key,scene gjson.Result)bool {
115-
// check if we have the scene already
116-
matches:=funk.Filter(knownScenes,func(sstring)bool {
117-
returnstrings.Contains(s,scene.Get("id").String())
118-
})
119-
iffunk.IsEmpty(matches) {
120-
processScene(scene)
114+
// For some reason, the API will occasionally return results belonging to other studios filter them out
115+
ifscene.Get("brand").String()==strings.ToLower(siteData.studio) {
116+
// check if we have the scene already
117+
matches:=funk.Filter(knownScenes,func(sstring)bool {
118+
returnstrings.Contains(s,scene.Get("id").String())
119+
})
120+
iffunk.IsEmpty(matches) {
121+
processScene(scene)
122+
}
121123
}
122124
returntrue
123125
})
@@ -143,6 +145,8 @@ func VirtualPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out
143145
// set up api requests to use the token in the Instance Header
144146
apiCollector.OnRequest(func(r*colly.Request) {
145147
r.Headers.Set("Instance",token)
148+
r.Headers.Set("Referer",siteData.absoluteURL)
149+
r.Headers.Set("Origin",siteData.absoluteURL)
146150
})
147151
apiCollector.Visit(nextApiUrl)
148152
}
@@ -155,23 +159,60 @@ func VirtualPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out
155159
id:=urlParts[len(urlParts)-2]
156160
offset=9999// do read more pages, we only need 1
157161
nextApiUrl="https://site-api.project1service.com/v2/releases/"+id
158-
siteCollector.Visit("https://virtualporn.com/videos")
162+
siteCollector.Visit(siteData.absoluteURL+`videos`)
159163

160164
}else {
161165
// call virtualporn.com, this is just to get the instance token to use the api for this session
162166
nextApiUrl="https://site-api.project1service.com/v2/releases?type=scene&limit=24&offset="+strconv.Itoa(offset)
163-
siteCollector.Visit("https://virtualporn.com/videos")
167+
siteCollector.Visit(siteData.absoluteURL+`videos`)
164168
}
165169

166170
ifupdateSite {
167-
updateSiteLastUpdate(scraperID)
171+
updateSiteLastUpdate(siteData.scraperID)
168172
}
169-
logScrapeFinished(scraperID,siteID)
173+
logScrapeFinished(siteData.scraperID,siteData.siteID)
170174
returnnil
171175
}
172176

177+
typesiteMetaDatastruct {
178+
scraperIDstring
179+
siteIDstring
180+
modelURLstring
181+
absoluteURLstring
182+
baseURLstring
183+
membersURLstring
184+
studiostring
185+
}
186+
187+
funcVirtualPorn(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
188+
bvrMetaData:=siteMetaData{
189+
scraperID:"bvr",
190+
siteID:"VirtualPorn",
191+
modelURL:"https://virtualporn.com/model/",
192+
absoluteURL:"https://virtualporn.com/",
193+
baseURL:"virtualporn.com",
194+
membersURL:`https://site-ma.virtualporn.com/`,
195+
studio:"BangBros",
196+
}
197+
returnProject1ServiceAPI(wg,updateSite,knownScenes,out,singleSceneURL,singeScrapeAdditionalInfo,&bvrMetaData,limitScraping)
198+
}
199+
200+
funcBrazzersVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
201+
zzvrMetaData:=siteMetaData{
202+
scraperID:"zzvr",
203+
siteID:"BrazzersVR",
204+
modelURL:"https://www.brazzersvr.com/pornstar/",
205+
absoluteURL:"https://www.brazzersvr.com/",
206+
baseURL:"www.brazzersvr.com",
207+
membersURL:`https://site-ma.brazzersvr.com/`,
208+
studio:"Brazzers",
209+
}
210+
returnProject1ServiceAPI(wg,updateSite,knownScenes,out,singleSceneURL,singeScrapeAdditionalInfo,&zzvrMetaData,limitScraping)
211+
}
212+
173213
funcinit() {
174214
registerScraper("bvr","VirtualPorn","https://images.cn77nd.com/members/bangbros/favicon/apple-icon-60x60.png","virtualporn.com",VirtualPorn)
215+
registerScraper("zzvr","BrazzersVR","https://images-assets-ht.project1content.com/BrazzersVR/Common/Favicon/63e2a8fdbdbe16.78976344.jpg","brazzersvr.com",BrazzersVR)
175216
}
176217

177218
// one off conversion routine called by migrations.go

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp