Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit999de3e

Browse files
committed
update docs and rename formatter class
1 parenta5baff7 commit999de3e

File tree

7 files changed

+95
-73
lines changed

7 files changed

+95
-73
lines changed

‎docs/index.rst‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Welcome to selectorlib's documentation!
99

1010

1111
installation
12+
usage
1213
selectorlib
1314
contributing
1415
authors

‎docs/selectorlib.rst‎

Lines changed: 6 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,15 @@
11
selectorlib package
22
===================
33

4-
Module contents
4+
Extractor Class
55
---------------
66

7-
..automodule::selectorlib
7+
..automodule::selectorlib.selectorlib
88
:members: Extractor
99

1010

11+
Formatter Class
12+
---------------
1113

12-
Usage
13-
-----
14-
15-
To use selectorlib with requests:
16-
17-
>>>import requests
18-
>>>from selectorlibimport Extractor
19-
>>>selector_yaml="""
20-
name:
21-
selector: h1.product_title
22-
price:
23-
selector: p.price
24-
stock:
25-
selector: p.stock
26-
tags:
27-
selector: span.tagged_as a
28-
short_description:
29-
selector: .woocommerce-product-details__short-description > p
30-
description:
31-
selector: div#tab-description p
32-
attributes:
33-
selector: table.shop_attributes
34-
multiple: True
35-
children:
36-
name:
37-
selector: th
38-
value:
39-
selector: td
40-
related_products:
41-
selector: li.product
42-
multiple: True
43-
children:
44-
name:
45-
selector: h2
46-
url:
47-
selector: a[href]
48-
price:
49-
selector: .price
50-
"""
51-
>>>extractor= Extractor.from_yaml_string(selector_yaml)
52-
>>>url='https://scrapeme.live/shop/Bulbasaur/'
53-
>>>response= requests.get(url)
54-
>>>selector.extract(response.text,base_url=response.url)
55-
{'attributes': [{'name': 'Weight', 'value': '15.2 kg'}],
56-
'description': 'Bulbasaur can be seen napping in bright sunlight. There is a '
57-
'seed on its back. By soaking up the sun’s rays, the seed '
58-
'grows progressively larger.',
59-
'name': 'Bulbasaur',
60-
'price': '£ 63.00',
61-
'related_products': [{'name': 'Pidgeot',
62-
'price': '£ 185.00',
63-
'url': 'Pidgeot £ 185.00'},
64-
{'name': 'Ekans',
65-
'price': '£ 55.00',
66-
'url': 'Ekans £ 55.00'},
67-
{'name': 'Charizard',
68-
'price': '£ 156.00',
69-
'url': 'Charizard £ 156.00'}],
70-
'short_description': 'Bulbasaur can be seen napping in bright sunlight. There '
71-
'is a seed on its back. By soaking up the sun’s rays, '
72-
'the seed grows progressively larger.',
73-
'stock': '45 in stock',
74-
'tags': 'bulbasaur'}
14+
..automodule::selectorlib.formatter
15+
:members: Formatter

‎docs/usage.rst‎

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
Usage
2+
======
3+
4+
To use selectorlib with requests:
5+
6+
>>>import requests
7+
>>>from selectorlibimport Extractor
8+
>>>selector_yaml="""
9+
name:
10+
selector: h1.product_title
11+
price:
12+
selector: p.price
13+
stock:
14+
selector: p.stock
15+
tags:
16+
selector: span.tagged_as a
17+
short_description:
18+
selector: .woocommerce-product-details__short-description > p
19+
description:
20+
selector: div#tab-description p
21+
attributes:
22+
selector: table.shop_attributes
23+
multiple: True
24+
children:
25+
name:
26+
selector: th
27+
value:
28+
selector: td
29+
related_products:
30+
selector: li.product
31+
multiple: True
32+
children:
33+
name:
34+
selector: h2
35+
url:
36+
selector: a[href]
37+
price:
38+
selector: .price
39+
"""
40+
>>>extractor= Extractor.from_yaml_string(selector_yaml)
41+
>>>url='https://scrapeme.live/shop/Bulbasaur/'
42+
>>>response= requests.get(url)
43+
>>>selector.extract(response.text,base_url=response.url)
44+
{'attributes': [{'name': 'Weight', 'value': '15.2 kg'}],
45+
'description': 'Bulbasaur can be seen napping in bright sunlight. There is a '
46+
'seed on its back. By soaking up the sun’s rays, the seed '
47+
'grows progressively larger.',
48+
'name': 'Bulbasaur',
49+
'price': '£ 63.00',
50+
'related_products': [{'name': 'Pidgeot',
51+
'price': '£ 185.00',
52+
'url': 'Pidgeot £ 185.00'},
53+
{'name': 'Ekans',
54+
'price': '£ 55.00',
55+
'url': 'Ekans £ 55.00'},
56+
{'name': 'Charizard',
57+
'price': '£ 156.00',
58+
'url': 'Charizard £ 156.00'}],
59+
'short_description': 'Bulbasaur can be seen napping in bright sunlight. There '
60+
'is a seed on its back. By soaking up the sun’s rays, '
61+
'the seed grows progressively larger.',
62+
'stock': '45 in stock',
63+
'tags': 'bulbasaur'}

‎selectorlib/__init__.py‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
__version__='0.10.0'
88

99
from .selectorlibimportExtractor# noqa:F401
10+
from .formatterimportFormatter# noqa:F401

‎selectorlib/formatter.py‎

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,32 @@
1-
importabc
1+
classFormatter:
2+
"""
3+
Inherit this class and override format function
4+
"""
25

3-
4-
classFormat(abc.ABC):
5-
@abc.abstractmethod
66
defformat(self,text:str):
7-
"""return text after formatting"""
7+
"""Override this function in inherited subclass. return text after formatting"""
8+
returntext
89

910
@property
1011
defname(self):
1112
returnself.__class__.__name__
1213

14+
@classmethod
15+
defget_all(cls):
16+
"""
17+
returns all subclasses inherited from Formatter
18+
19+
>>> formatters = Formatter.get_all()
20+
>>> Extractor.from_yaml_file('a.yaml', formatters=formatters)
21+
"""
22+
returncls.__subclasses__()
23+
1324

14-
classInteger(Format):
25+
classInteger(Formatter):
1526
defformat(self,text):
1627
returnint(text)
28+
29+
30+
classDecimal(Formatter):
31+
defformat(self,text):
32+
returnfloat(text)

‎selectorlib/selectorlib.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def from_yaml_string(cls, yaml_string: str, formatters=None):
4747
deffrom_yaml_file(cls,yaml_filename:str,formatters=None):
4848
"""create `Extractor` object from yaml file
4949
50-
>>> extractor = Extractor.from_yaml_string(yaml_filename='selectors.yaml')
50+
>>> extractor = Extractor.from_yaml_string('selectors.yaml')
5151
"""
5252
withopen(yaml_filename)asyaml_fileobj:
5353
config=yaml.safe_load(yaml_fileobj.read())

‎tests/test_selectorlib.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def output_yaml():
3636

3737
deftest_content(html,input_yaml,output_yaml):
3838
base_url="https://scrapeme.live/shop/Bulbasaur/"
39-
formatters=[formatter.Integer]
39+
formatters=formatter.Formatter.get_all()
4040
selector=selectorlib.Extractor.from_yaml_string(input_yaml,formatters=formatters)
4141
output=selector.extract(html,base_url=base_url)
4242
assertoutput==yaml.safe_load(output_yaml)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp