|
16 | 16 | ) |
17 | 17 |
|
18 | 18 | importasyncio |
| 19 | +importsignal |
| 20 | +importsys |
19 | 21 | fromgeneral_processimportmain_process |
20 | 22 | fromasync_loggerimportwis_logger |
21 | 23 | fromasync_databaseimportAsyncDatabaseManager |
22 | 24 | fromcustom_processesimportcrawler_config_map |
23 | 25 |
|
24 | 26 | loop_counter=0 |
| 27 | +shutdown_event=asyncio.Event() |
| 28 | + |
| 29 | +defsignal_handler(sig,frame): |
| 30 | +"""处理 SIGINT 信号 (Ctrl+C)""" |
| 31 | +wis_logger.debug(f"Received signal{sig}, initiating graceful shutdown...") |
| 32 | +shutdown_event.set() |
| 33 | + |
| 34 | +asyncdefcleanup_resources(crawlers,db_manager): |
| 35 | +"""清理所有资源""" |
| 36 | +wis_logger.debug("Starting resource cleanup...") |
| 37 | + |
| 38 | +try: |
| 39 | +# 清理 web 爬虫(浏览器资源) |
| 40 | +if"web"incrawlers: |
| 41 | +wis_logger.debug("Closing web crawler...") |
| 42 | +awaitcrawlers["web"].close() |
| 43 | + |
| 44 | +# 清理数据库 |
| 45 | +wis_logger.debug("Cleaning up database...") |
| 46 | +awaitdb_manager.cleanup() |
| 47 | +wis_logger.debug("Resource cleanup completed successfully") |
| 48 | + |
| 49 | +exceptExceptionase: |
| 50 | +wis_logger.warning(f"Error during resource cleanup:{e}") |
25 | 51 |
|
26 | 52 | asyncdefschedule_task(): |
| 53 | +# 设置信号处理器 |
| 54 | +ifsys.platform!='win32': |
| 55 | +signal.signal(signal.SIGINT,signal_handler) |
| 56 | +signal.signal(signal.SIGTERM,signal_handler) |
| 57 | + |
27 | 58 | # initialize if any error, will raise exception |
28 | 59 | db_manager=AsyncDatabaseManager() |
29 | 60 | awaitdb_manager.initialize() |
30 | 61 | crawlers= {} |
31 | | -forplatforminALL_PLATFORMS: |
32 | | -ifplatform==KUAISHOU_PLATFORM_NAME: |
33 | | -try: |
34 | | -ks_crawler=KuaiShouCrawler(db_manager=db_manager) |
35 | | -awaitks_crawler.async_initialize() |
36 | | -crawlers[KUAISHOU_PLATFORM_NAME]=ks_crawler |
37 | | -exceptExceptionase: |
38 | | -wis_logger.warning(f"initialize kuaishou crawler failed:{e}, will abort all the sources for kuaishou platform") |
39 | | -elifplatform==WEIBO_PLATFORM_NAME: |
40 | | -try: |
41 | | -wb_crawler=WeiboCrawler(db_manager=db_manager) |
42 | | -awaitwb_crawler.async_initialize() |
43 | | -crawlers[WEIBO_PLATFORM_NAME]=wb_crawler |
44 | | -exceptExceptionase: |
45 | | -wis_logger.warning(f"initialize weibo crawler failed:{e}, will abort all the sources for weibo platform") |
46 | | -elifplatform=='web': |
| 62 | + |
| 63 | +try: |
| 64 | +forplatforminALL_PLATFORMS: |
| 65 | +ifplatform==KUAISHOU_PLATFORM_NAME: |
| 66 | +try: |
| 67 | +ks_crawler=KuaiShouCrawler(db_manager=db_manager) |
| 68 | +awaitks_crawler.async_initialize() |
| 69 | +crawlers[KUAISHOU_PLATFORM_NAME]=ks_crawler |
| 70 | +exceptExceptionase: |
| 71 | +wis_logger.warning(f"initialize kuaishou crawler failed:{e}, will abort all the sources for kuaishou platform") |
| 72 | +elifplatform==WEIBO_PLATFORM_NAME: |
| 73 | +try: |
| 74 | +wb_crawler=WeiboCrawler(db_manager=db_manager) |
| 75 | +awaitwb_crawler.async_initialize() |
| 76 | +crawlers[WEIBO_PLATFORM_NAME]=wb_crawler |
| 77 | +exceptExceptionase: |
| 78 | +wis_logger.warning(f"initialize weibo crawler failed:{e}, will abort all the sources for weibo platform") |
| 79 | +elifplatform=='web': |
| 80 | +try: |
| 81 | +web_crawler=AsyncWebCrawler(crawler_config_map=crawler_config_map,db_manager=db_manager) |
| 82 | +awaitweb_crawler.start() |
| 83 | +crawlers[platform]=web_crawler |
| 84 | +exceptExceptionase: |
| 85 | +wis_logger.warning(f"initialize web crawler failed:{e}, will abort all the sources for web platform and search engines") |
| 86 | +else: |
| 87 | +raiseValueError(f"platform{platform} not supported") |
| 88 | + |
| 89 | +globalloop_counter |
| 90 | +wis_logger.info("All crawlers initialized successfully, starting main loop...") |
| 91 | + |
| 92 | +whilenotshutdown_event.is_set(): |
47 | 93 | try: |
48 | | -web_crawler=AsyncWebCrawler(crawler_config_map=crawler_config_map,db_manager=db_manager) |
49 | | -awaitweb_crawler.start() |
50 | | -crawlers[platform]=web_crawler |
| 94 | +wis_logger.info(f'task execute loop{loop_counter+1}') |
| 95 | +tasks=awaitdb_manager.get_activated_focus_points_with_sources() |
| 96 | +jobs= [] |
| 97 | +fortaskintasks: |
| 98 | +focus=task['focus_point'] |
| 99 | +sources=task['sources'] |
| 100 | +ifnotfocus: |
| 101 | +continue |
| 102 | +ifnotfocus['freq']ornotfocus['focuspoint']: |
| 103 | +continue |
| 104 | +ifloop_counter%focus['freq']!=0: |
| 105 | +continue |
| 106 | +jobs.append(main_process(focus,sources,crawlers,db_manager)) |
| 107 | +loop_counter+=1 |
| 108 | + |
| 109 | +ifjobs: |
| 110 | +awaitasyncio.gather(*jobs) |
| 111 | + |
| 112 | +wis_logger.info('task execute loop finished, work after 3600 seconds') |
| 113 | + |
| 114 | +# 使用 wait_for 来允许中断 sleep |
| 115 | +try: |
| 116 | +awaitasyncio.wait_for(shutdown_event.wait(),timeout=3600) |
| 117 | +break# 如果 shutdown_event 被设置,退出循环 |
| 118 | +exceptasyncio.TimeoutError: |
| 119 | +continue# 超时后继续下一个循环 |
| 120 | + |
| 121 | +exceptasyncio.CancelledError: |
| 122 | +wis_logger.debug("Task cancelled, shutting down...") |
| 123 | +break |
| 124 | +exceptKeyboardInterrupt: |
| 125 | +wis_logger.debug("Received keyboard interrupt, shutting down...") |
| 126 | +break |
51 | 127 | exceptExceptionase: |
52 | | -wis_logger.warning(f"initialize web crawler failed:{e}, will abort all the sources for web platform and search engines") |
53 | | -else: |
54 | | -raiseValueError(f"platform{platform} not supported") |
| 128 | +wis_logger.warning(f"Unexpected error in main loop:{e}") |
| 129 | +# 不退出循环,继续处理 |
| 130 | + |
| 131 | +exceptExceptionase: |
| 132 | +wis_logger.warning(f"Critical error during initialization:{e}") |
| 133 | +finally: |
| 134 | +awaitcleanup_resources(crawlers,db_manager) |
55 | 135 |
|
56 | | -globalloop_counter |
| 136 | +if__name__=="__main__": |
57 | 137 | try: |
58 | | -whileTrue: |
59 | | -wis_logger.info(f'task execute loop{loop_counter+1}') |
60 | | -tasks=awaitdb_manager.get_activated_focus_points_with_sources() |
61 | | -jobs= [] |
62 | | -fortaskintasks: |
63 | | -focus=task['focus_point'] |
64 | | -sources=task['sources'] |
65 | | -ifnotfocus: |
66 | | -continue |
67 | | -ifnotfocus['freq']ornotfocus['focuspoint']: |
68 | | -continue |
69 | | -ifloop_counter%focus['freq']!=0: |
70 | | -continue |
71 | | -jobs.append(main_process(focus,sources,crawlers,db_manager)) |
72 | | -loop_counter+=1 |
73 | | -awaitasyncio.gather(*jobs) |
74 | | -wis_logger.info('task execute loop finished, work after 3600 seconds') |
75 | | -awaitasyncio.sleep(3600) |
| 138 | +asyncio.run(schedule_task()) |
76 | 139 | exceptKeyboardInterrupt: |
77 | | -wis_logger.info("Received interrupt signal, shutting down...") |
| 140 | +wis_logger.debug("Program interrupted by user") |
78 | 141 | exceptExceptionase: |
79 | | -wis_logger.error(f"Unexpected error in main loop:{e}") |
| 142 | +wis_logger.warning(f"Program failed with error:{e}") |
80 | 143 | finally: |
81 | | -# 清理数据库资源 |
82 | | -try: |
83 | | -if"web"incrawlers: |
84 | | -awaitcrawlers["web"].close() |
85 | | -awaitdb_manager.cleanup() |
86 | | -wis_logger.debug("Database cleanup completed") |
87 | | -exceptExceptionase: |
88 | | -wis_logger.error(f"Database cleanup failed:{e}") |
89 | | - |
90 | | -asyncio.run(schedule_task()) |
| 144 | +wis_logger.debug("Program shutdown complete") |