Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5b44ef9

Browse files
Sumesh MuraliAshwin Rajeev
Sumesh Murali
authored and
Ashwin Rajeev
committed
fixed flake8 issues
1 parentfeaabb9 commit5b44ef9

File tree

4 files changed

+30
-10
lines changed

4 files changed

+30
-10
lines changed

‎selectorlib/formatter.py‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ class Formatter:
44
"""
55

66
defformat(self,text:str):
7-
"""Override this function in inherited subclass. return text after formatting"""
7+
"""Override this function in inherited subclass.
8+
return text after formatting"""
89
returntext
910

1011
@property

‎selectorlib/selectorlib.py‎

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
defextract_field(element,item_type,attribute=None,formatter=None):
1010
ifitem_type=='Text':
11-
texts= [i.strip()foriinelement.xpath('.//text()').getall()ifi.strip()]
11+
texts= [
12+
i.strip()foriinelement.xpath('.//text()').getall()ifi.strip()
13+
]
1214
content=" ".join(texts)
1315
elifitem_type=='Link':
1416
content=element.xpath('.//@href').get()
@@ -68,7 +70,8 @@ def extract(self, html: str, base_url: str = None):
6870
"""
6971
Args:
7072
html: html string
71-
base_url (str, optional): specifying the base_url will make all extracted Links absolute
73+
base_url (str, optional): specifying the base_url will make all
74+
extracted Links absolute
7275
Returns:
7376
dict: extracted data from given html string
7477
@@ -80,7 +83,9 @@ def extract(self, html: str, base_url: str = None):
8083
sel.root.make_links_absolute()
8184
fields_data= {}
8285
forselector_name,selector_configinself.config.items():
83-
fields_data[selector_name]=self._extract_selector(selector_config,sel)
86+
fields_data[selector_name]=self._extract_selector(
87+
selector_config,sel
88+
)
8489
returnfields_data
8590

8691
def_extract_selector(self,field_config,parent_parser):
@@ -105,7 +110,9 @@ def _extract_selector(self, field_config, parent_parser):
105110
if'attribute'infield_config:
106111
kwargs['attribute']=field_config['attribute']
107112
if'format'infield_config:
108-
kwargs['formatter']=self.formatters[field_config['format']]
113+
kwargs['formatter']=self.formatters[
114+
field_config['format']
115+
]
109116
value=extract_field(element,item_type,**kwargs)
110117

111118
iffield_config.get('multiple')isnotTrue:
@@ -119,7 +126,10 @@ def _get_child_item(self, field_config, element):
119126
children_config=field_config['children']
120127
child_item= {}
121128
forfieldinchildren_config:
122-
child_value=self._extract_selector(children_config[field],element)
129+
child_value=self._extract_selector(
130+
children_config[field],
131+
element
132+
)
123133
child_item[field]=child_value
124134
returnchild_item
125135

‎setup.py‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
'Programming Language :: Python :: 3.6',
3535
'Programming Language :: Python :: 3.7',
3636
],
37-
description="A library to read a YML file with Xpath or CSS Selectors and extract data from HTML pages using them",
37+
description="A library to read a YML file with Xpath or CSS Selectors"
38+
" and extract data from HTML pages using them",
3839
entry_points={
3940
'console_scripts': [
4041
'selectorlib=selectorlib.cli:main',

‎tests/test_selectorlib.py‎

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ def extract_field_test_html():
6565
deftest_content(html,input_yaml,output_yaml):
6666
base_url="https://scrapeme.live/shop/Bulbasaur/"
6767
formatters=formatter.Formatter.get_all()
68-
extractor=selectorlib.Extractor.from_yaml_string(input_yaml,formatters=formatters)
68+
extractor=selectorlib.Extractor.from_yaml_string(
69+
input_yaml,
70+
formatters=formatters
71+
)
6972
output=extractor.extract(html,base_url=base_url)
7073
assertoutput==yaml.safe_load(output_yaml)
7174

@@ -90,8 +93,13 @@ def test_empty_selector_in_children(
9093
"/B004K4CIKC/ref=sr_1_3?qid=1563864262&refinements=p_89:NIKE" \
9194
"&s=apparel&sr=1-3"
9295
formatters=formatter.Formatter.get_all()
93-
extractor=selectorlib.Extractor.from_yaml_string(empty_selector_yaml,formatters=formatters)
94-
output=extractor.extract(amazon_nike_product_page_html,base_url=base_url)
96+
extractor=selectorlib.Extractor.from_yaml_string(
97+
empty_selector_yaml,formatters=formatters
98+
)
99+
output=extractor.extract(
100+
amazon_nike_product_page_html,
101+
base_url=base_url
102+
)
95103
assertoutput==json.loads(amazon_nike_product_page_output)
96104

97105

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp