|
25 | 25 |
|
26 | 26 |
|
27 | 27 | defmain(_): |
28 | | -exit_after=np.infifFLAGS.exit_afterisNoneelseFLAGS.exit_after |
| 28 | +exit_after=np.infifFLAGS.exit_afterisNoneelseFLAGS.exit_after |
29 | 29 |
|
30 | | -withopen(FLAGS.roster_json,'rb')asfile_handle: |
31 | | -roster=json.load(file_handle) |
| 30 | +withopen(FLAGS.roster_json,'rb')asfile_handle: |
| 31 | +roster=json.load(file_handle) |
32 | 32 |
|
33 | | -start_t=time() |
34 | | -last_check_t=0 |
| 33 | +start_t=time() |
| 34 | +last_check_t=0 |
35 | 35 |
|
36 | | -ifFLAGS.clear_cachedandexists(FLAGS.snapshot_dir): |
37 | | -rmtree(FLAGS.snapshot_dir) |
| 36 | +ifFLAGS.clear_cachedandexists(FLAGS.snapshot_dir): |
| 37 | +rmtree(FLAGS.snapshot_dir) |
38 | 38 |
|
39 | | -whileTrue: |
40 | | -iftime()-last_check_t>FLAGS.check_every: |
41 | | -changed,deltas= [], [] |
| 39 | +whileTrue: |
| 40 | +iftime()-last_check_t>FLAGS.check_every: |
| 41 | +changed,deltas= [], [] |
42 | 42 |
|
43 | | -pbar=tqdm(roster.items()) |
44 | | -forurl,optinpbar: |
45 | | -pbar.set_description(f"Checking{url}") |
| 43 | +pbar=tqdm(roster.items()) |
| 44 | +forurl,optinpbar: |
| 45 | +pbar.set_description(f'Checking{url}') |
46 | 46 |
|
47 | | -# Snapshot the current webpage. |
48 | | -out_dir=join(FLAGS.snapshot_dir, |
49 | | -util.folder_name_from_url(url)) |
50 | | -success=snapshot(url,out_dir) |
51 | | -ifnotsuccess: |
52 | | -continue |
| 47 | +# Snapshot the current webpage. |
| 48 | +out_dir=join(FLAGS.snapshot_dir,util.folder_name_from_url(url)) |
| 49 | +success=snapshot(url,out_dir) |
| 50 | +ifnotsuccess: |
| 51 | +continue |
53 | 52 |
|
54 | | -# Compare with the previous snapshot. |
55 | | -snapshot_paths=sorted( |
56 | | -glob(join(out_dir,'????_??_??_??_??_??.html'))) |
57 | | -iflen(snapshot_paths)>1: |
58 | | -delta=diff_snapshots(snapshot_paths[-2], |
59 | | -snapshot_paths[-1],out_dir,opt) |
60 | | -ifdelta!='': |
61 | | -changed.append(url) |
62 | | -deltas.append(delta) |
| 53 | +# Compare with the previous snapshot. |
| 54 | +snapshot_paths=sorted(glob(join(out_dir,'????_??_??_??_??_??.html'))) |
| 55 | +iflen(snapshot_paths)>1: |
| 56 | +delta=diff_snapshots(snapshot_paths[-2],snapshot_paths[-1], |
| 57 | +out_dir,opt) |
| 58 | +ifdelta!='': |
| 59 | +changed.append(url) |
| 60 | +deltas.append(delta) |
63 | 61 |
|
64 | | -# Remove earlier screenshots to avoid storage explosion. |
65 | | -iflen(snapshot_paths)>2: |
66 | | -forsnapshot_pathinsnapshot_paths[:-2]: |
67 | | -remove(snapshot_path) |
| 62 | +# Remove earlier screenshots to avoid storage explosion. |
| 63 | +iflen(snapshot_paths)>2: |
| 64 | +forsnapshot_pathinsnapshot_paths[:-2]: |
| 65 | +remove(snapshot_path) |
68 | 66 |
|
69 | | -last_check_t=time() |
| 67 | +last_check_t=time() |
70 | 68 |
|
71 | | -# Email myself the results. |
72 | | -ifchanged: |
73 | | -msg='' |
74 | | -forurl,deltainzip(changed,deltas): |
75 | | -msg+=f'------\n{url}\n\n{delta}\n\n\n' |
76 | | -util.email_oneself(msg,FLAGS.gmail,subject='Webpage Monitor') |
| 69 | +# Email myself the results. |
| 70 | +ifchanged: |
| 71 | +msg='' |
| 72 | +forurl,deltainzip(changed,deltas): |
| 73 | +msg+=f'------\n{url}\n\n{delta}\n\n\n' |
| 74 | +util.email_oneself(msg,FLAGS.gmail,subject='Webpage Monitor') |
77 | 75 |
|
78 | | -logging.info('Change detected; email sent') |
79 | | -else: |
80 | | -logging.info('No change detected') |
| 76 | +logging.info('Change detected; email sent') |
| 77 | +else: |
| 78 | +logging.info('No change detected') |
81 | 79 |
|
82 | | -iftime()-start_t>exit_after: |
83 | | -break |
| 80 | +iftime()-start_t>exit_after: |
| 81 | +break |
84 | 82 |
|
85 | 83 |
|
86 | 84 | defdiff_snapshots(html0_path,html1_path,out_dir,opt): |
87 | | -# Parse URL-specific options. |
88 | | -ignore_prefices=opt.get('ignore_prefix') |
89 | | -ifignore_preficesisNone: |
90 | | -ignore_prefices= [] |
91 | | -ifisinstance(ignore_prefices,str): |
92 | | -ignore_prefices= [ignore_prefices] |
93 | | -ignore_prefices=tuple(ignore_prefices) |
94 | | -# Diff the two HTMLs. |
95 | | -html0_content=util.read_file(html0_path) |
96 | | -html1_content=util.read_file(html1_path) |
97 | | -delta=difflib.ndiff(html0_content.split('\n'),html1_content.split('\n')) |
98 | | -# Keep differences only. |
99 | | -delta= [xforxindeltaifx.startswith(('+ ','- '))] |
100 | | -# Ignore specified patterns. |
101 | | -filtered_delta= [ |
102 | | -xforxindelta |
103 | | -ifnotx.lstrip('+ ').lstrip('- ').startswith(ignore_prefices) |
104 | | -] |
105 | | -filtered_delta='\n'.join(filtered_delta) |
106 | | -delta_path=join(out_dir,'delta.html') |
107 | | -util.write_file(filtered_delta,delta_path) |
108 | | -returnfiltered_delta |
| 85 | +# Parse URL-specific options. |
| 86 | +ignore_prefices=opt.get('ignore_prefix') |
| 87 | +ifignore_preficesisNone: |
| 88 | +ignore_prefices= [] |
| 89 | +ifisinstance(ignore_prefices,str): |
| 90 | +ignore_prefices= [ignore_prefices] |
| 91 | +ignore_prefices=tuple(ignore_prefices) |
| 92 | +# Diff the two HTMLs. |
| 93 | +html0_content=util.read_file(html0_path) |
| 94 | +html1_content=util.read_file(html1_path) |
| 95 | +delta=difflib.ndiff(html0_content.split('\n'),html1_content.split('\n')) |
| 96 | +# Keep differences only. |
| 97 | +delta= [xforxindeltaifx.startswith(('+ ','- '))] |
| 98 | +# Ignore specified patterns. |
| 99 | +filtered_delta= [ |
| 100 | +xforxindelta |
| 101 | +ifnotx.lstrip('+ ').lstrip('- ').startswith(ignore_prefices) |
| 102 | + ] |
| 103 | +filtered_delta='\n'.join(filtered_delta) |
| 104 | +delta_path=join(out_dir,'delta.html') |
| 105 | +util.write_file(filtered_delta,delta_path) |
| 106 | +returnfiltered_delta |
109 | 107 |
|
110 | 108 |
|
111 | 109 | defsnapshot(url,out_dir): |
112 | | -try: |
113 | | -request=requests.get(url) |
114 | | -exceptrequests.exceptions.ConnectionError: |
115 | | -logging.warn(f'Connection Error:{url}; ignored') |
116 | | -returnFalse |
117 | | -html_src=request.content.decode() |
118 | | -ifnotexists(out_dir): |
119 | | -makedirs(out_dir) |
120 | | -timestamp=datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') |
121 | | -html_path=join(out_dir,timestamp+'.html') |
122 | | -util.write_file(html_src,html_path) |
123 | | -returnTrue |
| 110 | +try: |
| 111 | +request=requests.get(url) |
| 112 | +exceptrequests.exceptions.ConnectionError: |
| 113 | +logging.warn('Connection Error:%s; ignored',url) |
| 114 | +returnFalse |
| 115 | +html_src=request.content.decode() |
| 116 | +ifnotexists(out_dir): |
| 117 | +makedirs(out_dir) |
| 118 | +timestamp=datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') |
| 119 | +html_path=join(out_dir,timestamp+'.html') |
| 120 | +util.write_file(html_src,html_path) |
| 121 | +returnTrue |
124 | 122 |
|
125 | 123 |
|
126 | 124 | if__name__=='__main__': |
127 | | -app.run(main) |
| 125 | +app.run(main) |