Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit14fd125

Browse files
committed
Diff HTML instead of screenshots
1 parentb04e8ed commit14fd125

File tree

6 files changed

+97
-136
lines changed

6 files changed

+97
-136
lines changed

‎.gitignore‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
roster.json
2-
gmail_app_pswd
1+
snapshots/
2+
33
.*.swp
44
.DS_Store
55

‎environment.yml‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,5 @@ name: webpage-monitor
22
dependencies:
33
-python=3.6
44
-numpy
5-
-scipy
6-
-pillow
75
-tqdm
6+
-requests

‎gmail_app_pswd‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/Volumes/GoogleDrive/My Drive/root/work/github_private/webpage-monitor/gmail_app_pswd

‎roster.json‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/Volumes/GoogleDrive/My Drive/root/work/github_private/webpage-monitor/roster.json

‎run.py‎

Lines changed: 61 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -5,141 +5,110 @@
55
fromglobimportglob
66
importjson
77
fromtimeimporttime
8+
importdatetime
9+
importdifflib
10+
importrequests
811
importnumpyasnp
9-
fromscipy.ndimageimportgaussian_filter
1012
fromtqdmimporttqdm
1113

1214
importutil
1315

14-
parser=argparse.ArgumentParser(description="Webpage monitor.")
16+
parser=argparse.ArgumentParser(description='Webpage monitor.')
1517
parser.add_argument('--roster_json',
1618
type=str,
1719
default='./roster.json',
18-
help="path to the roster")
20+
help='path to the roster')
1921
parser.add_argument('--check_every',
2022
type=int,
2123
default=43200,
22-
help="check every N seconds")
24+
help='check every N seconds')
2325
parser.add_argument('--exit_after',
2426
type=int,
2527
default=None,
26-
help="quit after N seconds")
27-
parser.add_argument('--tmp_dir',
28+
help='quit after N seconds')
29+
parser.add_argument('--snapshot_dir',
2830
type=str,
29-
default='/tmp/webpage-monitor',
30-
help="directory to dump screenshots for comparison")
31+
default='./snapshots',
32+
help='directory to dump screenshots for comparison')
3133
parser.add_argument('--clear_cached',
3234
action='store_true',
33-
help="whether to clear the screenshots on disk")
35+
help='whether to clear the screenshots on disk')
3436

3537

3638
defmain(args):
37-
ifargs.exit_afterisNone:
38-
exit_after=np.inf
39-
else:
40-
exit_after=args.exit_after
39+
exit_after=np.infifargs.exit_afterisNoneelseargs.exit_after
4140

42-
roster=load_roster(args.roster_json)
41+
withopen(args.roster_json,'rb')asfile_handle:
42+
roster=json.load(file_handle)
4343

4444
start_t=time()
4545
last_check_t=0
4646

47-
ifargs.clear_cachedandexists(args.tmp_dir):
48-
rmtree(args.tmp_dir)
47+
ifargs.clear_cachedandexists(args.snapshot_dir):
48+
rmtree(args.snapshot_dir)
4949

5050
whileTrue:
5151
iftime()-last_check_t>args.check_every:
5252
changed,deltas= [], []
5353

54-
forurl,optintqdm(roster.items(),desc="Checking URLs"):
55-
out_dir=join(args.tmp_dir,
56-
replace_special_char(url)).rstrip('/')
57-
58-
# Take screenshots
59-
screenshot(url,out_dir,opt)
60-
61-
pngs=sorted(glob(join(out_dir,'*.png')))
62-
63-
# Compare with previous screenshots
64-
iflen(pngs)>1:
65-
delta_png=out_dir+'_delta.png'
66-
delta=diff_screenshots(*pngs[-2:],delta_png)
67-
ifdeltaisnotNone:
54+
forurl,optintqdm(roster.items(),desc='Checking URLs'):
55+
# Snapshot the current webpage.
56+
out_dir=join(args.snapshot_dir,
57+
util.folder_name_from_url(url))
58+
snapshot(url,out_dir,opt)
59+
60+
# Compare with the previous snapshot.
61+
snapshot_paths=sorted(
62+
glob(join(out_dir,'????_??_??_??_??_??.html')))
63+
iflen(snapshot_paths)>1:
64+
delta=diff_snapshots(snapshot_paths[-2],
65+
snapshot_paths[-1],out_dir,opt)
66+
ifdelta!='':
6867
changed.append(url)
6968
deltas.append(delta)
7069

7170
# Remove earlier screenshots to avoid storage explosion
72-
iflen(pngs)>2:
73-
forfinpngs[:-2]:
74-
remove(f)
71+
iflen(snapshot_paths)>2:
72+
forxinsnapshot_paths[:-2]:
73+
remove(x)
7574

7675
last_check_t=time()
7776

7877
# Email myself the results
7978
ifchanged:
8079
msg=''
8180
forurl,deltainzip(changed,deltas):
82-
msg+="file://{delta}\n{url}\n\n".format(delta=delta,
83-
url=url)
84-
util.email_myself(msg,subject="Webpage Monitor")
85-
util.format_print("Change detected; email sent",'header')
86-
87-
iftime()-start_t>exit_after:
88-
break
89-
90-
91-
defdiff_screenshots(old_png,
92-
new_png,
93-
delta_png,
94-
pix_diff_thres=0.1,
95-
n_diff_thres=16,
96-
unchanged_alpha=0.2,
97-
diff_blur_sigma=4):
98-
old=util.imread_arr(old_png)
99-
new=util.imread_arr(new_png)
100-
101-
# Sizes are even different
102-
ifold.shape!=new.shape:
103-
util.imwrite_arr(new,delta_png)
104-
returndelta_png
105-
106-
# Check content
107-
pixel_is_diff=np.abs(old-new)>=pix_diff_thres# (H, W, 3)
108-
pixel_is_diff=np.sum(pixel_is_diff,axis=2)>0
109-
110-
# Not enough different pixels for a change
111-
ifnp.sum(pixel_is_diff)<=n_diff_thres:
112-
returnNone
113-
114-
# Highlight the changed part
115-
alpha=unchanged_alpha*np.ones_like(new)
116-
alpha[np.dstack([pixel_is_diff]*3)]=1
117-
alpha=gaussian_filter(alpha,diff_blur_sigma)
118-
delta=alpha*new+ (1-alpha)*np.zeros_like(new)
119-
util.imwrite_arr(delta,delta_png)
120-
returndelta_png
121-
122-
123-
defscreenshot(url,out_dir,opt,width=512,delay=3):
81+
msg+=f'{url}\n{delta}\n\n'
82+
util.email_myself(msg,subject='Webpage Monitor')
83+
util.format_print('Change detected; email sent','header')
84+
85+
iftime()-start_t>exit_after:
86+
break
87+
88+
89+
defdiff_snapshots(html0_path,html1_path,out_dir,opt):
90+
# TODO: Handle opt (page-specific special options)
91+
html0_content=util.read_file(html0_path)
92+
html1_content=util.read_file(html1_path)
93+
delta=difflib.ndiff(html0_content.split('\n'),html1_content.split('\n'))
94+
# Keep differences only.
95+
delta='\n'.join(xforxindelta
96+
ifx.startswith('+ ')orx.startswith('- '))
97+
delta_path=join(out_dir,'delta.html')
98+
util.write_file(delta,delta_path)
99+
returndelta
100+
101+
102+
defsnapshot(url,out_dir,opt):
103+
# TODO: Ditto
104+
request=requests.get(url)
105+
print(url)
106+
html_src=request.content.decode()
124107
ifnotexists(out_dir):
125108
makedirs(out_dir)
126-
127-
cmd= ('webkit2png --fullsize --no-images --ignore-ssl-check --width={w} '
128-
'--delay={delay} --dir={dir_} --filename={t} {url}').format(
129-
w=width,delay=delay,dir_=out_dir,t=time(),url=url)
130-
util.call(cmd,silence_stdout=True)
131-
132-
133-
defload_roster(roster_json):
134-
withopen(roster_json,'r')ash:
135-
roster=json.load(h)
136-
returnroster
137-
138-
139-
defreplace_special_char(url):
140-
returnurl.replace('/','_').replace('?',
141-
'_').replace('&',
142-
'_').replace(':','_')
109+
timestamp=datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
110+
html_path=join(out_dir,timestamp+'.html')
111+
util.write_file(html_src,html_path)
143112

144113

145114
if__name__=='__main__':

‎util.py‎

Lines changed: 31 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,34 @@
1-
fromsubprocessimportPopen,PIPE
21
fromemail.mime.textimportMIMEText
32
importsmtplib
4-
importnumpyasnp
5-
fromPILimportImage
63

74

8-
defimread_arr(path):
9-
im=Image.open(path)
10-
arr=np.array(im)[:, :, :3]
11-
arr=arr.astype(float)/np.iinfo(arr.dtype).max
12-
returnarr
5+
defread_file(path):
6+
withopen(path,'rb')asfile_handle:
7+
content=file_handle.read()
8+
returncontent.decode()
139

1410

15-
defimwrite_arr(arr,path,dtype='uint8'):
16-
arr= (arr*np.iinfo(dtype).max).astype(dtype)
17-
im=Image.fromarray(arr)
18-
im.save(path)
11+
defwrite_file(str_,path):
12+
withopen(path,'wb')asfile_handle:
13+
file_handle.write(str_.encode())
1914

2015

21-
defcall(cmd,cwd=None,silence_stdout=False):
22-
process=Popen(cmd,stdout=PIPE,stderr=PIPE,cwd=cwd,shell=True)
16+
deffolder_name_from_url(url):
17+
folder_name=url.rstrip('/')
18+
folder_name=folder_name.replace('http://','').replace('https://','')
19+
folder_name=folder_name.replace('/','_')
20+
folder_name=folder_name.replace('?','_')
21+
folder_name=folder_name.replace('&','_')
22+
folder_name=folder_name.replace(':','_')
23+
returnfolder_name
2324

24-
stdout,stderr=process.communicate()# waits for completion
25-
stdout,stderr=stdout.decode(),stderr.decode()
2625

27-
ifnotsilence_stdout:
28-
ifstdout!='':
29-
format_print(stdout,'okblue')
30-
31-
ifstderr!='':
32-
format_print(cmd,'warn')
33-
format_print(stderr,'fail')
34-
35-
36-
defemail_myself(
37-
msg,subject="Untitled",email='xiuming6zhang@gmail.com',
38-
pswd_path='./gmail_app_pswd'):
39-
withopen(pswd_path,'r')ash:
40-
pswd=h.readlines()
41-
pswd=pswd[0].strip()
26+
defemail_myself(msg,
27+
subject='Untitled',
28+
email='xiuming6zhang@gmail.com',
29+
pswd_path='./gmail_app_pswd'):
30+
pswd=read_file(pswd_path)
31+
pswd=pswd.strip()
4232

4333
to_emails= [email]
4434
from_email=email
@@ -56,33 +46,34 @@ def email_myself(
5646

5747

5848
defformat_print(msg,fmt):
59-
"""Prints a message with format.
49+
'''Prints a message with format.
6050
Args:
6151
msg (str): Message to print.
6252
fmt (str): Format; try your luck with any value -- don't worry; if
6353
it's illegal, you will be prompted with all legal values.
6454
Raises:
6555
ValueError: If the input format is illegal.
66-
"""
56+
'''
6757
fmt_strs= {
6858
'header':'\033[95m',
6959
'warn':'\033[93m',
7060
'fail':'\033[91m',
7161
'bold':'\033[1m',
72-
'underline':'\033[4m'}
62+
'underline':'\033[4m'
63+
}
7364

7465
iffmtinfmt_strs.keys():
7566
start_str=fmt_strs[fmt]
7667
end_str='\033[0m'
7768

7869
eliflen(fmt)==1:
79-
start_str="\n<"+"".join([fmt]*78)+'\n\n'# as per PEP8
80-
end_str='\n'+start_str[2:-2]+">\n"
70+
start_str='\n<'+''.join([fmt]*78)+'\n\n'# as per PEP8
71+
end_str='\n'+start_str[2:-2]+'>\n'
8172

8273
else:
83-
raiseValueError((
84-
"Legal values for fmt: %s, plus any single character"
85-
"(which will be repeated into the line separator),"
86-
"but input is'%s'")% (list(fmt_strs.keys()),fmt))
74+
raiseValueError(
75+
('Legal values for fmt: %s, plus any single character'
76+
'(which will be repeated into the line separator),'
77+
'but input is %s')% (list(fmt_strs.keys()),fmt))
8778

8879
print(start_str+msg+end_str)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp