Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf10bdc9

Browse files
authored
scraper: add SwallowBay native scraper (#1715)
Co-authored-by: crwxaj <crwxaj>
1 parent46cca32 commitf10bdc9

File tree

2 files changed

+144
-6
lines changed

2 files changed

+144
-6
lines changed

‎pkg/config/scrapers.json

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -367,12 +367,6 @@
367367
"company":"SuckMeVR",
368368
"avatar_url":"https://cdn-vr.sexlikereal.com/images/studio_creatives/logotypes/1/403/logo_crop_1657112082.png"
369369
},
370-
{
371-
"url":"https://www.sexlikereal.com/studios/swallowbay",
372-
"name":"SwallowBay",
373-
"company":"SwallowBay",
374-
"avatar_url":"https://mcdn.vrporn.com/files/20210330092926/swallowbay-400x400.jpg"
375-
},
376370
{
377371
"url":"https://www.sexlikereal.com/studios/sweetlonglips",
378372
"name":"Sweetlonglips",

‎pkg/scrape/swallowbay.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
package scrape
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"regexp"
7+
"strconv"
8+
"strings"
9+
"sync"
10+
11+
"github.com/gocolly/colly/v2"
12+
"github.com/nleeper/goment"
13+
"github.com/thoas/go-funk"
14+
"github.com/xbapps/xbvr/pkg/models"
15+
)
16+
17+
funcSwallowBay(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
18+
deferwg.Done()
19+
scraperID:="swallowbay"
20+
siteID:="SwallowBay"
21+
logScrapeStart(scraperID,siteID)
22+
23+
sceneCollector:=createCollector("swallowbay.com")
24+
siteCollector:=createCollector("swallowbay.com")
25+
26+
sceneCollector.OnHTML(`html`,func(e*colly.HTMLElement) {
27+
sc:= models.ScrapedScene{}
28+
sc.ScraperID=scraperID
29+
sc.SceneType="VR"
30+
sc.Studio="SwallowBay"
31+
sc.Site=siteID
32+
sc.SiteID=""
33+
sc.HomepageURL=e.Request.URL.String()
34+
35+
regexpSceneID:=regexp.MustCompile(`\-(\d+)\.html$`)
36+
sc.SiteID=regexpSceneID.FindStringSubmatch(e.Request.URL.Path)[1]
37+
38+
// Title
39+
e.ForEach(`div.content-title h1`,func(idint,e*colly.HTMLElement) {
40+
sc.Title=strings.TrimSpace(e.Text)
41+
})
42+
43+
// Cover URLs
44+
e.ForEach(`dl8-video`,func(idint,e*colly.HTMLElement) {
45+
coverUrl:=e.Attr("poster")
46+
sc.Covers=append(sc.Covers,coverUrl)
47+
})
48+
49+
// Cast
50+
sc.ActorDetails=make(map[string]models.ActorDetails)
51+
e.ForEach(`div.content-models a`,func(idint,e*colly.HTMLElement) {
52+
ifstrings.TrimSpace(e.Text)!="" {
53+
sc.Cast=append(sc.Cast,strings.TrimSpace(e.Attr("title")))
54+
sc.ActorDetails[strings.TrimSpace(e.Text)]= models.ActorDetails{Source:sc.ScraperID+" scrape",ProfileUrl:e.Attr("href")}
55+
}
56+
})
57+
58+
// Tags
59+
ignoreTags:= []string{"vr 180","vr 6k","8k","iphone","ultra high definition"}
60+
e.ForEach(`div.content-tags a`,func(idint,e*colly.HTMLElement) {
61+
tag:=strings.ToLower(strings.TrimSpace(e.Text))
62+
iftag!="" {
63+
for_,v:=rangeignoreTags {
64+
iftag==v {
65+
return
66+
}
67+
}
68+
sc.Tags=append(sc.Tags,tag)
69+
}
70+
})
71+
72+
// Synposis
73+
e.ForEach(`div.content-desc.active`,func(idint,e*colly.HTMLElement) {
74+
sc.Synopsis=strings.TrimSpace(strings.TrimSpace(e.Text))
75+
})
76+
77+
// Release date
78+
e.ForEach(`div.content-data div.content-date`,func(idint,e*colly.HTMLElement) {
79+
date:=strings.TrimSuffix(e.Text,"Date: ")
80+
tmpDate,_:=goment.New(strings.TrimSpace(date),"Do MMM, YYYY:")
81+
sc.Released=tmpDate.Format("YYYY-MM-DD")
82+
})
83+
84+
// Duration
85+
e.ForEach(`div.content-data div.content-time`,func(idint,e*colly.HTMLElement) {
86+
parts:=strings.Split(e.Text,":")
87+
iflen(parts)>1 {
88+
tmpDuration,err:=strconv.Atoi(strings.TrimSpace(parts[1]))
89+
iferr==nil {
90+
sc.Duration=tmpDuration
91+
}
92+
}
93+
})
94+
95+
// No filename information yet
96+
97+
sc.TrailerType="urls"
98+
vartrailers []models.VideoSource
99+
e.ForEach(`dl8-video source`,func(idint,e*colly.HTMLElement) {
100+
trailers=append(trailers, models.VideoSource{URL:e.Attr("src"),Quality:strings.TrimSpace(e.Attr("quality"))})
101+
})
102+
trailerJson,_:=json.Marshal(models.VideoSourceResponse{VideoSources:trailers})
103+
sc.TrailerSrc=string(trailerJson)
104+
105+
ifsc.SiteID!="" {
106+
sc.SceneID=fmt.Sprintf("swallowbay-%v",sc.SiteID)
107+
108+
// save only if we got a SceneID
109+
out<-sc
110+
}
111+
})
112+
113+
siteCollector.OnHTML(`div.pagination a`,func(e*colly.HTMLElement) {
114+
if!limitScraping {
115+
pageURL:=e.Request.AbsoluteURL(e.Attr("href"))
116+
siteCollector.Visit(pageURL)
117+
}
118+
})
119+
120+
siteCollector.OnHTML(`div.-video div.item-name a`,func(e*colly.HTMLElement) {
121+
sceneURL:=e.Request.AbsoluteURL(e.Attr("href"))
122+
123+
// If scene exist in database, there's no need to scrape
124+
if!funk.ContainsString(knownScenes,sceneURL) {
125+
sceneCollector.Visit(sceneURL)
126+
}
127+
})
128+
129+
ifsingleSceneURL!="" {
130+
sceneCollector.Visit(singleSceneURL)
131+
}else {
132+
siteCollector.Visit("https://swallowbay.com/most-recent/")
133+
}
134+
135+
ifupdateSite {
136+
updateSiteLastUpdate(scraperID)
137+
}
138+
logScrapeFinished(scraperID,siteID)
139+
returnnil
140+
}
141+
142+
funcinit() {
143+
registerScraper("swallowbay","SwallowBay","https://swallowbay.com/templates/swallowbay/images/favicons/apple-icon-180x180.png","swallowbay.com",SwallowBay)
144+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp