Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitdeb9b2e

Browse files
authored
scraper: Fix VRSpy Scene & Actor scrapers (#1808)
1 parent49658d4 commitdeb9b2e

File tree

2 files changed

+31
-57
lines changed

2 files changed

+31
-57
lines changed

‎pkg/models/model_external_reference.go

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"time"
1111

1212
"github.com/avast/retry-go/v4"
13-
"github.com/gocolly/colly/v2"
1413
"github.com/markphelps/optional"
1514

1615
"github.com/xbapps/xbvr/pkg/common"
@@ -962,31 +961,17 @@ func (scrapeRules ActorScraperConfig) buildGenericActorScraperRules() {
962961

963962
siteDetails=GenericScraperRuleSet{}
964963
siteDetails.Domain="vrspy.com"
965-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"biography",Selector:`.star-biography-description`})
966-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"image_url",Selector:`.star-photo img`,ResultType:"attr",Attribute:"src",PostProcessing: []PostProcessing{{Function:"RemoveQueryParams"}}})
967-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"images",Native:func(einterface{}) []string {
968-
html:=e.(*colly.HTMLElement)
969-
varvalues []string
970-
ifmainPhotoURL:=html.ChildAttr(`.star-photo img`,`src`);mainPhotoURL!="" {
971-
partialURLRegex:=regexp.MustCompile(`^(.*)/[^/]+.jpg`)
972-
ifpartialURLMatch:=partialURLRegex.FindStringSubmatch(mainPhotoURL);len(partialURLMatch)==2 {
973-
fullURLRegex:=regexp.MustCompile(regexp.QuoteMeta(partialURLMatch[1])+`/[^"]+.jpg`)
974-
nuxtData:=html.ChildText(`#__NUXT_DATA__`)
975-
ifimageURLs:=fullURLRegex.FindAllString(nuxtData,-1);imageURLs!=nil {
976-
values=imageURLs
977-
}
978-
}
979-
}
980-
returnvalues
981-
}})
982-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"height",Selector:`.about-me-mobile .stars-params-title:contains("Height:") + .stars-params-value`})
983-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"weight",Selector:`.about-me-mobile .stars-params-title:contains("Weight:") + .stars-params-value`})
984-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"band_size",Selector:`.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`,PostProcessing: []PostProcessing{{Function:"RegexString",Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`,"1"}}}})
985-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"cup_size",Selector:`.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`,PostProcessing: []PostProcessing{{Function:"RegexString",Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`,"2"}}}})
986-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"waist_size",Selector:`.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`,PostProcessing: []PostProcessing{{Function:"RegexString",Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`,"3"}}}})
987-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"hip_size",Selector:`.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`,PostProcessing: []PostProcessing{{Function:"RegexString",Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`,"4"}}}})
988-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"nationality",Selector:`.about-me-mobile .stars-params-title:contains("Nationality:") + .stars-params-value`,PostProcessing: []PostProcessing{{Function:"Lookup Country"}}})
989-
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"hair_color",Selector:`.about-me-mobile .stars-params-title:contains("Hair Color:") + .stars-params-value`})
964+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"biography",Selector:`.star-bio .show-more-text-container`})
965+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"image_url",Selector:`.avatar img`,ResultType:"attr",Attribute:"src",PostProcessing: []PostProcessing{{Function:"RemoveQueryParams"}}})
966+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"images",Selector:`.avatar img`,ResultType:"attr",Attribute:"src",PostProcessing: []PostProcessing{{Function:"RemoveQueryParams"}}})
967+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"height",Selector:`.star-info-row-title:contains("Height:") + span`})
968+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"weight",Selector:`.star-info-row-title:contains("Weight:") + span`})
969+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"band_size",Selector:`.star-info-row-title:contains("Measurements:") + span`,PostProcessing: []PostProcessing{{Function:"RegexString",Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`,"1"}}}})
970+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"cup_size",Selector:`.star-info-row-title:contains("Measurements:") + span`,PostProcessing: []PostProcessing{{Function:"RegexString",Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`,"2"}}}})
971+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"waist_size",Selector:`.star-info-row-title:contains("Measurements:") + span`,PostProcessing: []PostProcessing{{Function:"RegexString",Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`,"3"}}}})
972+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"hip_size",Selector:`.star-info-row-title:contains("Measurements:") + span`,PostProcessing: []PostProcessing{{Function:"RegexString",Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`,"4"}}}})
973+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"nationality",Selector:`.star-info-row-title:contains("Nationality:") + span`,PostProcessing: []PostProcessing{{Function:"Lookup Country"}}})
974+
siteDetails.SiteRules=append(siteDetails.SiteRules,GenericActorScraperRule{XbvrField:"hair_color",Selector:`.star-info-row-title:contains("Hair Color:") + span`})
990975
scrapeRules.GenericActorScrapingConfig["vrspy scrape"]=siteDetails
991976

992977
siteDetails=GenericScraperRuleSet{}

‎pkg/scrape/vrspy.go

Lines changed: 20 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -55,50 +55,39 @@ func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<-
5555

5656
sc.SceneID=scraperID+"-"+sc.SiteID
5757

58-
sc.Title=e.ChildText(`.video-content .header-container .section-header-container`)
59-
sc.Synopsis=e.ChildText(`.video-description`)
60-
sc.Tags=e.ChildTexts(`.video-categories .v-chip__content`)
61-
62-
e.ForEach(`.video-details-row`,func(idint,e*colly.HTMLElement) {
63-
parts:=strings.SplitN(e.Text,":",2)
64-
key,value:=parts[0],parts[1]
65-
switchstrings.TrimSpace(key) {
66-
case"Stars":
67-
sc.ActorDetails=make(map[string]models.ActorDetails)
68-
e.ForEach(`.stars-list a`,func(idint,e*colly.HTMLElement) {
69-
sc.Cast=append(sc.Cast,e.Text)
70-
sc.ActorDetails[e.Text]= models.ActorDetails{
71-
Source:scraperID+" scrape",
72-
ProfileUrl:e.Request.AbsoluteURL(e.Attr(`href`)),
73-
}
74-
})
75-
case"Duration":
76-
durationParts:=strings.Split(strings.SplitN(strings.TrimSpace(value)," ",2)[0],":")
77-
iflen(durationParts)==3 {
78-
hours,_:=strconv.Atoi(durationParts[0])
79-
minutes,_:=strconv.Atoi(durationParts[1])
80-
sc.Duration=hours*60+minutes
58+
sc.Title=e.ChildText(`.video-content .header-container .video-title .section-header-container`)
59+
sc.Synopsis=e.ChildText(`.video-description-container`)
60+
sc.Tags=e.ChildTexts(`.video-categories .chip`)
61+
62+
sc.ActorDetails=make(map[string]models.ActorDetails)
63+
e.ForEach(`.video-actor-item`,func(idint,e*colly.HTMLElement) {
64+
sc.Cast=append(sc.Cast,e.Text)
65+
e.ForEach(`a`,func(idint,a*colly.HTMLElement) {
66+
sc.ActorDetails[e.Text]= models.ActorDetails{
67+
Source:scraperID+" scrape",
68+
ProfileUrl:e.Request.AbsoluteURL(a.Attr(`href`)),
8169
}
82-
case"Release date":
83-
tmpDate,_:=goment.New(strings.TrimSpace(value),"DD MMM YYYY")
84-
sc.Released=tmpDate.Format("YYYY-MM-DD")
85-
}
70+
71+
})
8672
})
8773

8874
vardurationParts []string
8975
// Date & Duration
90-
e.ForEach(`div.single-video-info__list-item`,func(idint,e*colly.HTMLElement) {
76+
e.ForEach(`.video-details-info-item`,func(idint,e*colly.HTMLElement) {
9177
parts:=strings.Split(e.Text,":")
9278
iflen(parts)>1 {
9379
switchstrings.TrimSpace(parts[0]) {
9480
case"Release date":
95-
tmpDate,_:=goment.New(strings.TrimSpace(parts[1]),"MMM D, YYYY")
81+
tmpDate,_:=goment.New(strings.TrimSpace(parts[1]),"DD MMMM YYYY")
9682
sc.Released=tmpDate.Format("YYYY-MM-DD")
9783
case"Duration":
9884
durationParts=strings.Split(strings.TrimSpace(parts[1])," ")
9985
tmpDuration,err:=strconv.Atoi(durationParts[0])
86+
mins:=tmpDuration*60
87+
tmpDuration,err=strconv.Atoi(parts[2])
88+
mins=mins+tmpDuration
10089
iferr==nil {
101-
sc.Duration=tmpDuration
90+
sc.Duration=mins
10291
}
10392
}
10493
}
@@ -114,7 +103,7 @@ func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<-
114103
}
115104

116105
nuxtData:=e.ChildText(`#__NUXT_DATA__`)
117-
imageRegex:=regexp.MustCompile(regexp.QuoteMeta(cdnSceneURL.String())+`(/photos/[^?"]*\.jpg)\?width`)
106+
imageRegex:=regexp.MustCompile(regexp.QuoteMeta(cdnSceneURL.String())+`(/photos/[^?"]*\.jpg)`)
118107
sc.Gallery=imageRegex.FindAllString(nuxtData,-1)
119108

120109
// trailer details

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp