Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit81f070e

Browse files
authored
scraper: Add Up Close VR Scraper (#1853)
* WIP Full Site Scrape OnlyCurrently scrapes the whole site. Needs logic to prevent rescrapes of scenes already processed. Needs logic to handle single scene scrapes. This is a completely different body request then whole site. The JSON is in the same format for single scenes so whole site logic can be reused for extracting data. Tags need filtering as original series and adult time original are redundant tags.Code needs to be cleaned and formatted* FinalWorks. Tested both single scene and full site. All data available is retrieved. There is a bug when scraping single scene sites that the pop up doesn't show to save it. Unsure if it is bug in my XBVR or something in my code.* Remove Junk lines* More clean up* Code Clean Up
1 parent943283e commit81f070e

File tree

1 file changed

+158
-0
lines changed

1 file changed

+158
-0
lines changed

‎pkg/scrape/upclosevr.go

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package scrape
2+
3+
import (
4+
"regexp"
5+
"strconv"
6+
"strings"
7+
"sync"
8+
9+
"github.com/go-resty/resty/v2"
10+
"github.com/gocolly/colly/v2"
11+
"github.com/mozillazg/go-slugify"
12+
"github.com/nleeper/goment"
13+
"github.com/thoas/go-funk"
14+
"github.com/tidwall/gjson"
15+
"github.com/xbapps/xbvr/pkg/models"
16+
)
17+
18+
funcUpCloseVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
19+
// this scraper is non-standard in that it gathers info via an api rather than scraping html pages
20+
deferwg.Done()
21+
scraperID:="upclosevr"
22+
siteID:="UpCloseVR"
23+
logScrapeStart(scraperID,siteID)
24+
25+
siteCollector:=createCollector("www.upclosevr.com")
26+
27+
siteCollector.OnHTML(`script`,func(e*colly.HTMLElement) {
28+
apiKeyRegex:=regexp.MustCompile(`"apiKey":"(.+)"}},"site`)
29+
applicationIDRegex:=regexp.MustCompile(`"applicationID":"(.+)","apiKey`)
30+
apiKey:=apiKeyRegex.FindStringSubmatch(e.Text)
31+
applicationID:=applicationIDRegex.FindStringSubmatch(e.Text)
32+
33+
iflen(apiKey)>0&&len(applicationID)>0 {
34+
pageTotal:=1
35+
client:=resty.New()
36+
37+
forpage:=0;page<pageTotal;page++ {
38+
39+
varpayloadStrstring
40+
ifsingleSceneURL!="" {
41+
tmp:=strings.Split(singleSceneURL,"/")
42+
sceneID:=tmp[len(tmp)-1]
43+
payloadStr=`{"requests":[{"indexName":"all_scenes","params":"clickAnalytics=true&facetFilters=%5B%5B%22availableOnSite%3Aupclosevr%22%5D%2C%5B%22clip_id%3A`+sceneID+`%22%5D%5D&facets=%5B%5D&hitsPerPage=1&tagFilters="}]}`
44+
}else {
45+
payloadStr=`{"requests":[{"indexName":"all_scenes_latest_desc","params":"analytics=true&analyticsTags=%5B%22component%3Asearchlisting%22%2C%22section%3Afreetour%22%2C%22site%3Aupclosevr%22%2C%22context%3Avideos%22%2C%22device%3Adesktop%22%5D&clickAnalytics=true&facetingAfterDistinct=true&facets=%5B%22categories.name%22%5D&filters=(upcoming%3A'0')%20AND%20availableOnSite%3Aupclosevr&highlightPostTag=__%2Fais-highlight__&highlightPreTag=__ais-highlight__&hitsPerPage=60&maxValuesPerFacet=1000&page=`+strconv.Itoa(page)+`&query=&tagFilters="}]}`
46+
}
47+
48+
varpayload=strings.NewReader(payloadStr)
49+
resp,err:=client.R().
50+
SetHeader("Origin","https://www.upclosevr.com").
51+
SetHeader("Referer","https://www.upclosevr.com/").
52+
SetHeader("User-Agent",UserAgent).
53+
SetHeader("x-algolia-api-key",apiKey[1]).
54+
SetHeader("x-algolia-application-id",applicationID[1]).
55+
SetBody(payload).
56+
Post("https://tsmkfa364q-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20JavaScript%20(4.22.1)%3B%20Browser%3B%20instantsearch.js%20(4.64.3)%3B%20react%20(18.2.0)%3B%20react-instantsearch%20(7.5.5)%3B%20react-instantsearch-core%20(7.5.5)%3B%20JS%20Helper%20(3.16.2)")
57+
58+
iferr!=nil {
59+
log.Errorln("UpCloseVR encourtned an error on the API Call",err)
60+
return
61+
}
62+
63+
// Convert the resp into a json string for gjson usability
64+
jsonString:=resp.String()
65+
66+
// Check to see if there are multiple pages of results
67+
ifpageTotal==1&&singleSceneURL==""&&!limitScraping {
68+
pageTotal=int(gjson.Get(jsonString,"results.0.nbPages").Int())
69+
}
70+
71+
// Make sure we are getting valid response. If the hits array is zero something went wrong
72+
iflen(gjson.Get(jsonString,"results.0.hits").Array())==0 {
73+
log.Errorln("No Results found for UpCloseVR message:",gjson.Get(jsonString,"message").String(),"response code:",gjson.Get(jsonString,"status").String())
74+
}
75+
76+
// iterate over each hit result
77+
fori,_:=rangegjson.Get(jsonString,"results.0.hits").Array() {
78+
queryStr:=`results.0.hits.`+strconv.Itoa(i)
79+
80+
// Check to make sure we don't update scenes we have already collected
81+
sceneID:=gjson.Get(jsonString,queryStr+`.clip_id`).String()
82+
sceneURL:=`https://www.upclosevr.com/en/video/upclosevr/`+gjson.Get(jsonString,queryStr+`.url_title`).String()+`/`+sceneID
83+
if!funk.ContainsString(knownScenes,sceneURL)||singleSceneURL!="" {
84+
85+
sc:= models.ScrapedScene{}
86+
87+
sc.ScraperID=scraperID
88+
sc.SceneType="VR"
89+
sc.Studio=siteID
90+
sc.Site=siteID
91+
sc.SiteID=sceneID
92+
sc.HomepageURL=sceneURL
93+
94+
// Scene ID
95+
sc.SceneID=slugify.Slugify(sc.Site)+"-"+sc.SiteID
96+
97+
// Date
98+
tmpDate,_:=goment.New(gjson.Get(jsonString,queryStr+`.release_date`).String(),"YYYY-MM-DD")
99+
sc.Released=tmpDate.Format("YYYY-MM-DD")
100+
101+
// Cover
102+
sc.Covers=append(sc.Covers,`https://transform.gammacdn.com/movies/`+gjson.Get(jsonString,queryStr+`.pictures.1920x1080`).String())
103+
104+
// Synopsis
105+
sc.Synopsis=strings.TrimSpace(strings.Replace(gjson.Get(jsonString,queryStr+`.description`).String(),"</br></br>"," ",-1))
106+
107+
// Title
108+
sc.Title=strings.TrimSpace(gjson.Get(jsonString,queryStr+`.title`).String())
109+
log.Infoln(`Scraping `+sc.Title)
110+
111+
// Cast - Females Only can be update to include males if wanted
112+
sc.ActorDetails=make(map[string]models.ActorDetails)
113+
fori,name:=rangegjson.Get(jsonString,queryStr+`.female_actors.#.name`).Array() {
114+
sc.Cast=append(sc.Cast,name.String())
115+
116+
actorQuery:=queryStr+`.female_actors.`+strconv.Itoa(i)
117+
118+
sc.ActorDetails[name.String()]= models.ActorDetails{
119+
Source:scraperID+" scrape",
120+
ProfileUrl:`https://www.upclosevr.com/en/pornstar/view/`+gjson.Get(jsonString,actorQuery+`.url_name`).String()+`/`+gjson.Get(jsonString,actorQuery+`.actor_id`).String(),
121+
}
122+
}
123+
124+
// Junk Tags we don't want to add to scene data
125+
skiptags:=map[string]bool{
126+
"Original Series":true,
127+
"Adult Time Original":true,
128+
}
129+
130+
// Tags
131+
for_,name:=rangegjson.Get(jsonString,queryStr+`.categories.#.name`).Array() {
132+
if!skiptags[name.String()] {
133+
sc.Tags=append(sc.Tags,name.String())
134+
}
135+
}
136+
137+
// Duration is in total seconds
138+
sc.Duration=int(gjson.Get(jsonString,queryStr+`.length`).Int())/60
139+
140+
out<-sc
141+
}
142+
}
143+
}
144+
}
145+
})
146+
147+
siteCollector.Visit("https://www.upclosevr.com/en/videos")
148+
149+
ifupdateSite {
150+
updateSiteLastUpdate(scraperID)
151+
}
152+
logScrapeFinished(scraperID,siteID)
153+
returnnil
154+
}
155+
156+
funcinit() {
157+
registerScraper("upclosevr","Up Close VR","https://static01-cms-fame.gammacdn.com/upclosevr/m/3ixx4xg65im880g8/UpClose-VR_Favicon_114x114.png","upclosevr.com",UpCloseVR)
158+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp