Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit38a33e7

Browse files
authored
Merge branch 'scrapy:master' into ipv6_proxy_support
2 parentsfb08154 +b49aa2f commit38a33e7

File tree

8 files changed

+27
-11
lines changed

8 files changed

+27
-11
lines changed

‎docs/topics/addons.rst‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ recommend that such custom components should be written in the following way:
8888

8989
1. The custom component (e.g. ``MyDownloadHandler``) shouldn't inherit from the
9090
default Scrapy one (e.g.
91-
``scrapy.core.downloader.handlers.http.HTTPDownloadHandler``), but instead
91+
``scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler``), but instead
9292
be able to load the class of the fallback component from a special setting
9393
(e.g. ``MY_FALLBACK_DOWNLOAD_HANDLER``), create an instance of it and use
9494
it.
@@ -166,7 +166,6 @@ Use a fallback component:
166166

167167
..code-block::python
168168
169-
from scrapy.core.downloader.handlers.httpimport HTTPDownloadHandler
170169
from scrapy.utils.miscimport build_from_crawler
171170
172171

‎docs/topics/settings.rst‎

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ connections (for ``HTTP10DownloadHandler``).
711711
so you can safely ignore this setting,
712712
unless you really want to use HTTP/1.0 and override
713713
:setting:`DOWNLOAD_HANDLERS` for ``http(s)`` scheme accordingly,
714-
i.e. to ``'scrapy.core.downloader.handlers.http.HTTP10DownloadHandler'``.
714+
i.e. to ``'scrapy.core.downloader.handlers.http10.HTTP10DownloadHandler'``.
715715

716716
..setting::DOWNLOADER_CLIENTCONTEXTFACTORY
717717

@@ -909,8 +909,8 @@ Default:
909909
{
910910
"data":"scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler",
911911
"file":"scrapy.core.downloader.handlers.file.FileDownloadHandler",
912-
"http":"scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
913-
"https":"scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
912+
"http":"scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler",
913+
"https":"scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler",
914914
"s3":"scrapy.core.downloader.handlers.s3.S3DownloadHandler",
915915
"ftp":"scrapy.core.downloader.handlers.ftp.FTPDownloadHandler",
916916
}

‎scrapy/core/downloader/handlers/http.py‎

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,18 @@
1+
importwarnings
2+
13
fromscrapy.core.downloader.handlers.http10importHTTP10DownloadHandler
24
fromscrapy.core.downloader.handlers.http11import (
35
HTTP11DownloadHandlerasHTTPDownloadHandler,
46
)
7+
fromscrapy.exceptionsimportScrapyDeprecationWarning
8+
9+
warnings.warn(
10+
"The scrapy.core.downloader.handlers.http module is deprecated,"
11+
" please import scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler"
12+
" instead of its deprecated alias scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
13+
ScrapyDeprecationWarning,
14+
stacklevel=2,
15+
)
516

617
__all__= [
718
"HTTP10DownloadHandler",

‎scrapy/core/downloader/handlers/s3.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
fromtypingimportTYPE_CHECKING,Any
44

5-
fromscrapy.core.downloader.handlers.httpimportHTTPDownloadHandler
5+
fromscrapy.core.downloader.handlers.http11importHTTP11DownloadHandler
66
fromscrapy.exceptionsimportNotConfigured
77
fromscrapy.utils.botoimportis_botocore_available
88
fromscrapy.utils.httpobjimporturlparse_cached
@@ -29,7 +29,7 @@ def __init__(
2929
aws_access_key_id:str|None=None,
3030
aws_secret_access_key:str|None=None,
3131
aws_session_token:str|None=None,
32-
httpdownloadhandler:type[HTTPDownloadHandler]=HTTPDownloadHandler,
32+
httpdownloadhandler:type[HTTP11DownloadHandler]=HTTP11DownloadHandler,
3333
**kw:Any,
3434
):
3535
ifnotis_botocore_available():

‎scrapy/robotstxt.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def decode_robotstxt(
2828
ifto_native_str_type:
2929
body_decoded=to_unicode(robotstxt_body)
3030
else:
31-
body_decoded=robotstxt_body.decode("utf-8",errors="ignore")
31+
body_decoded=robotstxt_body.decode("utf-8-sig",errors="ignore")
3232
exceptUnicodeDecodeError:
3333
# If we found garbage or robots.txt in an encoding other than UTF-8, disregard it.
3434
# Switch to 'allow all' state.

‎scrapy/settings/default_settings.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,8 @@
251251
DOWNLOAD_HANDLERS_BASE= {
252252
"data":"scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler",
253253
"file":"scrapy.core.downloader.handlers.file.FileDownloadHandler",
254-
"http":"scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
255-
"https":"scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
254+
"http":"scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler",
255+
"https":"scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler",
256256
"s3":"scrapy.core.downloader.handlers.s3.S3DownloadHandler",
257257
"ftp":"scrapy.core.downloader.handlers.ftp.FTPDownloadHandler",
258258
}

‎tests/test_addons.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def update_settings(self, settings):
149149
)
150150
assert (
151151
crawler.settings.get(FALLBACK_SETTING)
152-
=="scrapy.core.downloader.handlers.http.HTTPDownloadHandler"
152+
=="scrapy.core.downloader.handlers.http11.HTTP11DownloadHandler"
153153
)
154154

155155
settings_dict= {

‎tests/test_robotstxt_interface.py‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,12 @@ def test_decode_non_utf8(self):
129129
decoded_content=decode_robotstxt(robotstxt_body,spider=None)
130130
assertdecoded_content=="User-agent: *\nDisallow: /\n"
131131

132+
# UTF-8 BOM at the beginning of the file ignored
133+
deftest_decode_utf8_bom(self):
134+
robotstxt_body=b"\xef\xbb\xbfUser-agent: *\nDisallow: /\n"
135+
decoded_content=decode_robotstxt(robotstxt_body,spider=None)
136+
assertdecoded_content=="User-agent: *\nDisallow: /\n"
137+
132138

133139
classTestPythonRobotParser(BaseRobotParserTest):
134140
defsetup_method(self):

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp