|
| 1 | +package scrape |
| 2 | + |
| 3 | +import ( |
| 4 | +"regexp" |
| 5 | +"strconv" |
| 6 | +"strings" |
| 7 | +"sync" |
| 8 | + |
| 9 | +"github.com/go-resty/resty/v2" |
| 10 | +"github.com/gocolly/colly/v2" |
| 11 | +"github.com/mozillazg/go-slugify" |
| 12 | +"github.com/nleeper/goment" |
| 13 | +"github.com/thoas/go-funk" |
| 14 | +"github.com/tidwall/gjson" |
| 15 | +"github.com/xbapps/xbvr/pkg/models" |
| 16 | +) |
| 17 | + |
| 18 | +funcUpCloseVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error { |
| 19 | +// this scraper is non-standard in that it gathers info via an api rather than scraping html pages |
| 20 | +deferwg.Done() |
| 21 | +scraperID:="upclosevr" |
| 22 | +siteID:="UpCloseVR" |
| 23 | +logScrapeStart(scraperID,siteID) |
| 24 | + |
| 25 | +siteCollector:=createCollector("www.upclosevr.com") |
| 26 | + |
| 27 | +siteCollector.OnHTML(`script`,func(e*colly.HTMLElement) { |
| 28 | +apiKeyRegex:=regexp.MustCompile(`"apiKey":"(.+)"}},"site`) |
| 29 | +applicationIDRegex:=regexp.MustCompile(`"applicationID":"(.+)","apiKey`) |
| 30 | +apiKey:=apiKeyRegex.FindStringSubmatch(e.Text) |
| 31 | +applicationID:=applicationIDRegex.FindStringSubmatch(e.Text) |
| 32 | + |
| 33 | +iflen(apiKey)>0&&len(applicationID)>0 { |
| 34 | +pageTotal:=1 |
| 35 | +client:=resty.New() |
| 36 | + |
| 37 | +forpage:=0;page<pageTotal;page++ { |
| 38 | + |
| 39 | +varpayloadStrstring |
| 40 | +ifsingleSceneURL!="" { |
| 41 | +tmp:=strings.Split(singleSceneURL,"/") |
| 42 | +sceneID:=tmp[len(tmp)-1] |
| 43 | +payloadStr=`{"requests":[{"indexName":"all_scenes","params":"clickAnalytics=true&facetFilters=%5B%5B%22availableOnSite%3Aupclosevr%22%5D%2C%5B%22clip_id%3A`+sceneID+`%22%5D%5D&facets=%5B%5D&hitsPerPage=1&tagFilters="}]}` |
| 44 | +}else { |
| 45 | +payloadStr=`{"requests":[{"indexName":"all_scenes_latest_desc","params":"analytics=true&analyticsTags=%5B%22component%3Asearchlisting%22%2C%22section%3Afreetour%22%2C%22site%3Aupclosevr%22%2C%22context%3Avideos%22%2C%22device%3Adesktop%22%5D&clickAnalytics=true&facetingAfterDistinct=true&facets=%5B%22categories.name%22%5D&filters=(upcoming%3A'0')%20AND%20availableOnSite%3Aupclosevr&highlightPostTag=__%2Fais-highlight__&highlightPreTag=__ais-highlight__&hitsPerPage=60&maxValuesPerFacet=1000&page=`+strconv.Itoa(page)+`&query=&tagFilters="}]}` |
| 46 | +} |
| 47 | + |
| 48 | +varpayload=strings.NewReader(payloadStr) |
| 49 | +resp,err:=client.R(). |
| 50 | +SetHeader("Origin","https://www.upclosevr.com"). |
| 51 | +SetHeader("Referer","https://www.upclosevr.com/"). |
| 52 | +SetHeader("User-Agent",UserAgent). |
| 53 | +SetHeader("x-algolia-api-key",apiKey[1]). |
| 54 | +SetHeader("x-algolia-application-id",applicationID[1]). |
| 55 | +SetBody(payload). |
| 56 | +Post("https://tsmkfa364q-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20JavaScript%20(4.22.1)%3B%20Browser%3B%20instantsearch.js%20(4.64.3)%3B%20react%20(18.2.0)%3B%20react-instantsearch%20(7.5.5)%3B%20react-instantsearch-core%20(7.5.5)%3B%20JS%20Helper%20(3.16.2)") |
| 57 | + |
| 58 | +iferr!=nil { |
| 59 | +log.Errorln("UpCloseVR encourtned an error on the API Call",err) |
| 60 | +return |
| 61 | +} |
| 62 | + |
| 63 | +// Convert the resp into a json string for gjson usability |
| 64 | +jsonString:=resp.String() |
| 65 | + |
| 66 | +// Check to see if there are multiple pages of results |
| 67 | +ifpageTotal==1&&singleSceneURL==""&&!limitScraping { |
| 68 | +pageTotal=int(gjson.Get(jsonString,"results.0.nbPages").Int()) |
| 69 | +} |
| 70 | + |
| 71 | +// Make sure we are getting valid response. If the hits array is zero something went wrong |
| 72 | +iflen(gjson.Get(jsonString,"results.0.hits").Array())==0 { |
| 73 | +log.Errorln("No Results found for UpCloseVR message:",gjson.Get(jsonString,"message").String(),"response code:",gjson.Get(jsonString,"status").String()) |
| 74 | +} |
| 75 | + |
| 76 | +// iterate over each hit result |
| 77 | +fori,_:=rangegjson.Get(jsonString,"results.0.hits").Array() { |
| 78 | +queryStr:=`results.0.hits.`+strconv.Itoa(i) |
| 79 | + |
| 80 | +// Check to make sure we don't update scenes we have already collected |
| 81 | +sceneID:=gjson.Get(jsonString,queryStr+`.clip_id`).String() |
| 82 | +sceneURL:=`https://www.upclosevr.com/en/video/upclosevr/`+gjson.Get(jsonString,queryStr+`.url_title`).String()+`/`+sceneID |
| 83 | +if!funk.ContainsString(knownScenes,sceneURL)||singleSceneURL!="" { |
| 84 | + |
| 85 | +sc:= models.ScrapedScene{} |
| 86 | + |
| 87 | +sc.ScraperID=scraperID |
| 88 | +sc.SceneType="VR" |
| 89 | +sc.Studio=siteID |
| 90 | +sc.Site=siteID |
| 91 | +sc.SiteID=sceneID |
| 92 | +sc.HomepageURL=sceneURL |
| 93 | + |
| 94 | +// Scene ID |
| 95 | +sc.SceneID=slugify.Slugify(sc.Site)+"-"+sc.SiteID |
| 96 | + |
| 97 | +// Date |
| 98 | +tmpDate,_:=goment.New(gjson.Get(jsonString,queryStr+`.release_date`).String(),"YYYY-MM-DD") |
| 99 | +sc.Released=tmpDate.Format("YYYY-MM-DD") |
| 100 | + |
| 101 | +// Cover |
| 102 | +sc.Covers=append(sc.Covers,`https://transform.gammacdn.com/movies/`+gjson.Get(jsonString,queryStr+`.pictures.1920x1080`).String()) |
| 103 | + |
| 104 | +// Synopsis |
| 105 | +sc.Synopsis=strings.TrimSpace(strings.Replace(gjson.Get(jsonString,queryStr+`.description`).String(),"</br></br>"," ",-1)) |
| 106 | + |
| 107 | +// Title |
| 108 | +sc.Title=strings.TrimSpace(gjson.Get(jsonString,queryStr+`.title`).String()) |
| 109 | +log.Infoln(`Scraping `+sc.Title) |
| 110 | + |
| 111 | +// Cast - Females Only can be update to include males if wanted |
| 112 | +sc.ActorDetails=make(map[string]models.ActorDetails) |
| 113 | +fori,name:=rangegjson.Get(jsonString,queryStr+`.female_actors.#.name`).Array() { |
| 114 | +sc.Cast=append(sc.Cast,name.String()) |
| 115 | + |
| 116 | +actorQuery:=queryStr+`.female_actors.`+strconv.Itoa(i) |
| 117 | + |
| 118 | +sc.ActorDetails[name.String()]= models.ActorDetails{ |
| 119 | +Source:scraperID+" scrape", |
| 120 | +ProfileUrl:`https://www.upclosevr.com/en/pornstar/view/`+gjson.Get(jsonString,actorQuery+`.url_name`).String()+`/`+gjson.Get(jsonString,actorQuery+`.actor_id`).String(), |
| 121 | +} |
| 122 | +} |
| 123 | + |
| 124 | +// Junk Tags we don't want to add to scene data |
| 125 | +skiptags:=map[string]bool{ |
| 126 | +"Original Series":true, |
| 127 | +"Adult Time Original":true, |
| 128 | +} |
| 129 | + |
| 130 | +// Tags |
| 131 | +for_,name:=rangegjson.Get(jsonString,queryStr+`.categories.#.name`).Array() { |
| 132 | +if!skiptags[name.String()] { |
| 133 | +sc.Tags=append(sc.Tags,name.String()) |
| 134 | +} |
| 135 | +} |
| 136 | + |
| 137 | +// Duration is in total seconds |
| 138 | +sc.Duration=int(gjson.Get(jsonString,queryStr+`.length`).Int())/60 |
| 139 | + |
| 140 | +out<-sc |
| 141 | +} |
| 142 | +} |
| 143 | +} |
| 144 | +} |
| 145 | +}) |
| 146 | + |
| 147 | +siteCollector.Visit("https://www.upclosevr.com/en/videos") |
| 148 | + |
| 149 | +ifupdateSite { |
| 150 | +updateSiteLastUpdate(scraperID) |
| 151 | +} |
| 152 | +logScrapeFinished(scraperID,siteID) |
| 153 | +returnnil |
| 154 | +} |
| 155 | + |
| 156 | +funcinit() { |
| 157 | +registerScraper("upclosevr","Up Close VR","https://static01-cms-fame.gammacdn.com/upclosevr/m/3ixx4xg65im880g8/UpClose-VR_Favicon_114x114.png","upclosevr.com",UpCloseVR) |
| 158 | +} |