Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb0512d9

Browse files
authored
Revert "Revert "feat: Optimize concurrent scraping (#1828)" (#1833)" (#1855)
This reverts commit943283e.Co-authored-by: crwxaj <crwxaj>
1 parent81f070e commitb0512d9

39 files changed

+107
-119
lines changed

‎pkg/models/model_scraper.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@ package models
22

33
import (
44
"encoding/json"
5-
"sync"
5+
"sync/atomic"
66
)
77

88
varscrapers []Scraper
99

10-
typeScraperFuncfunc(*sync.WaitGroup,bool, []string,chan<-ScrapedScene,string,string,bool)error
10+
typeScraperFuncfunc(*ScrapeWG,bool, []string,chan<-ScrapedScene,string,string,bool)error
1111

1212
typeScraperstruct {
1313
IDstring`json:"id"`
@@ -90,3 +90,29 @@ func RegisterScraper(id string, name string, avatarURL string, domain string, f
9090
s.MasterSiteId=masterSiteId
9191
scrapers=append(scrapers,s)
9292
}
93+
94+
// Custom wg functions, to allow access to the current count of waitgroups. This allows running scrapers at max count always
95+
typeScrapeWGstruct {
96+
countint64
97+
}
98+
99+
func (wg*ScrapeWG)Add(nint64) {
100+
atomic.AddInt64(&wg.count,n)
101+
}
102+
103+
func (wg*ScrapeWG)Done() {
104+
wg.Add(-1)
105+
ifatomic.LoadInt64(&wg.count)<0 {
106+
panic("negative wait group counter")
107+
}
108+
}
109+
110+
func (wg*ScrapeWG)Wait(nint64) {
111+
foratomic.LoadInt64(&wg.count)>=n&&atomic.LoadInt64(&wg.count)!=0 {
112+
continue
113+
}
114+
}
115+
116+
func (wg*ScrapeWG)Count()int64 {
117+
returnatomic.LoadInt64(&wg.count)
118+
}

‎pkg/scrape/baberoticavr.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"regexp"
88
"strconv"
99
"strings"
10-
"sync"
1110

1211
"github.com/go-resty/resty/v2"
1312
"github.com/gocolly/colly/v2"
@@ -16,7 +15,7 @@ import (
1615
"github.com/xbapps/xbvr/pkg/models"
1716
)
1817

19-
funcBaberoticaVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
18+
funcBaberoticaVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
2019
deferwg.Done()
2120
scraperID:="baberoticavr"
2221
siteID:="BaberoticaVR"

‎pkg/scrape/badoink.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
"regexp"
99
"strconv"
1010
"strings"
11-
"sync"
1211
"time"
1312

1413
"github.com/go-resty/resty/v2"
@@ -23,7 +22,7 @@ import (
2322
"github.com/xbapps/xbvr/pkg/models"
2423
)
2524

26-
funcBadoinkSite(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,scraperIDstring,siteIDstring,URLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
25+
funcBadoinkSite(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,scraperIDstring,siteIDstring,URLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
2726
deferwg.Done()
2827
logScrapeStart(scraperID,siteID)
2928

@@ -268,23 +267,23 @@ func BadoinkSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out
268267
returnnil
269268
}
270269

271-
funcBadoinkVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
270+
funcBadoinkVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
272271
returnBadoinkSite(wg,updateSite,knownScenes,out,singleSceneURL,"badoinkvr","BadoinkVR","https://badoinkvr.com/vrpornvideos?order=newest",singeScrapeAdditionalInfo,limitScraping)
273272
}
274273

275-
funcB18VR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
274+
funcB18VR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
276275
returnBadoinkSite(wg,updateSite,knownScenes,out,singleSceneURL,"18vr","18VR","https://18vr.com/vrpornvideos?order=newest",singeScrapeAdditionalInfo,limitScraping)
277276
}
278277

279-
funcVRCosplayX(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
278+
funcVRCosplayX(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
280279
returnBadoinkSite(wg,updateSite,knownScenes,out,singleSceneURL,"vrcosplayx","VRCosplayX","https://vrcosplayx.com/cosplaypornvideos?order=newest",singeScrapeAdditionalInfo,limitScraping)
281280
}
282281

283-
funcBabeVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
282+
funcBabeVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
284283
returnBadoinkSite(wg,updateSite,knownScenes,out,singleSceneURL,"babevr","BabeVR","https://babevr.com/vrpornvideos?order=newest",singeScrapeAdditionalInfo,limitScraping)
285284
}
286285

287-
funcKinkVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
286+
funcKinkVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
288287
returnBadoinkSite(wg,updateSite,knownScenes,out,singleSceneURL,"kinkvr","KinkVR","https://kinkvr.com/bdsm-vr-videos?order=newest",singeScrapeAdditionalInfo,limitScraping)
289288
}
290289

‎pkg/scrape/caribbeancom.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"encoding/json"
55
"strconv"
66
"strings"
7-
"sync"
87

98
"github.com/bregydoc/gtranslate"
109
"github.com/gocolly/colly/v2"
@@ -15,7 +14,7 @@ import (
1514
"golang.org/x/text/language"
1615
)
1716

18-
funcCariVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
17+
funcCariVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
1918
deferwg.Done()
2019
scraperID:="caribbeancomvr"
2120
siteID:="CaribbeanCom VR"

‎pkg/scrape/czechvr.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"regexp"
55
"strconv"
66
"strings"
7-
"sync"
87

98
"github.com/gocolly/colly/v2"
109
"github.com/mozillazg/go-slugify"
@@ -14,7 +13,7 @@ import (
1413
"github.com/xbapps/xbvr/pkg/models"
1514
)
1615

17-
funcCzechVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,scraperIDstring,siteIDstring,nwIDstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
16+
funcCzechVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,scraperIDstring,siteIDstring,nwIDstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
1817
deferwg.Done()
1918
logScrapeStart(scraperID,siteID)
2019
commonDb,_:=models.GetCommonDB()
@@ -199,14 +198,14 @@ func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan
199198
}
200199

201200
funcaddCZVRScraper(idstring,namestring,nwidstring,avatarURLstring) {
202-
registerScraper(id,name,avatarURL,"czechvrnetwork.com",func(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
201+
registerScraper(id,name,avatarURL,"czechvrnetwork.com",func(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
203202
returnCzechVR(wg,updateSite,knownScenes,out,singleSceneURL,id,name,nwid,singeScrapeAdditionalInfo,limitScraping)
204203
})
205204
}
206205

207206
funcinit() {
208207
// scraper for scraping single scenes where only the url is provided
209-
registerScraper("czechvr-single_scene","Czech VR - Other Studios","","czechvrnetwork.com",func(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
208+
registerScraper("czechvr-single_scene","Czech VR - Other Studios","","czechvrnetwork.com",func(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
210209
returnCzechVR(wg,updateSite,knownScenes,out,singleSceneURL,"","","","",limitScraping)
211210
})
212211
addCZVRScraper("czechvr","Czech VR","15","https://www.czechvr.com/images/favicon/android-chrome-256x256.png")

‎pkg/scrape/darkroomvr.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@ import (
55
"fmt"
66
"strconv"
77
"strings"
8-
"sync"
98

109
"github.com/gocolly/colly/v2"
1110
"github.com/nleeper/goment"
1211
"github.com/thoas/go-funk"
1312
"github.com/xbapps/xbvr/pkg/models"
1413
)
1514

16-
funcDarkRoomVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
15+
funcDarkRoomVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
1716
deferwg.Done()
1817
scraperID:="darkroomvr"
1918
siteID:="DarkRoomVR"

‎pkg/scrape/fuckpassvr.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"net/url"
66
"regexp"
77
"strings"
8-
"sync"
98

109
"github.com/go-resty/resty/v2"
1110
"github.com/gocolly/colly/v2"
@@ -14,7 +13,7 @@ import (
1413
"github.com/xbapps/xbvr/pkg/models"
1514
)
1615

17-
funcFuckPassVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
16+
funcFuckPassVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
1817
deferwg.Done()
1918
scraperID:="fuckpassvr-native"
2019
siteID:="FuckPassVR"

‎pkg/scrape/groobyvr.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"regexp"
66
"strconv"
77
"strings"
8-
"sync"
98

109
"github.com/gocolly/colly/v2"
1110
"github.com/mozillazg/go-slugify"
@@ -14,7 +13,7 @@ import (
1413
"github.com/xbapps/xbvr/pkg/models"
1514
)
1615

17-
funcGroobyVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
16+
funcGroobyVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
1817
deferwg.Done()
1918
scraperID:="groobyvr"
2019
siteID:="GroobyVR"

‎pkg/scrape/hologirlsvr.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,14 @@ package scrape
33
import (
44
"regexp"
55
"strings"
6-
"sync"
76

87
"github.com/gocolly/colly/v2"
98
"github.com/mozillazg/go-slugify"
109
"github.com/thoas/go-funk"
1110
"github.com/xbapps/xbvr/pkg/models"
1211
)
1312

14-
funcHoloGirlsVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
13+
funcHoloGirlsVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
1514
deferwg.Done()
1615
scraperID:="hologirlsvr"
1716
siteID:="HoloGirlsVR"

‎pkg/scrape/lethalhardcorevr.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package scrape
33
import (
44
"regexp"
55
"strings"
6-
"sync"
76

87
"github.com/gocolly/colly/v2"
98
"github.com/mozillazg/go-slugify"
@@ -26,7 +25,7 @@ func isGoodTag(lookup string) bool {
2625
returntrue
2726
}
2827

29-
funcLethalHardcoreSite(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,scraperIDstring,siteIDstring,URLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
28+
funcLethalHardcoreSite(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,scraperIDstring,siteIDstring,URLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
3029
deferwg.Done()
3130
logScrapeStart(scraperID,siteID)
3231

@@ -176,11 +175,11 @@ func LethalHardcoreSite(wg *sync.WaitGroup, updateSite bool, knownScenes []strin
176175
returnnil
177176
}
178177

179-
funcLethalHardcoreVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
178+
funcLethalHardcoreVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
180179
returnLethalHardcoreSite(wg,updateSite,knownScenes,out,singleSceneURL,"lethalhardcorevr","LethalHardcoreVR","https://lethalhardcorevr.com/lethal-hardcore-vr-scenes.html?studio=95595&sort=released",singeScrapeAdditionalInfo,limitScraping)
181180
}
182181

183-
funcWhorecraftVR(wg*sync.WaitGroup,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
182+
funcWhorecraftVR(wg*models.ScrapeWG,updateSitebool,knownScenes []string,outchan<- models.ScrapedScene,singleSceneURLstring,singeScrapeAdditionalInfostring,limitScrapingbool)error {
184183
returnLethalHardcoreSite(wg,updateSite,knownScenes,out,singleSceneURL,"whorecraftvr","WhorecraftVR","https://lethalhardcorevr.com/lethal-hardcore-vr-scenes.html?studio=95347&sort=released",singeScrapeAdditionalInfo,limitScraping)
185184
}
186185

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp