|
1 | 1 | importrequests
|
2 | 2 | importre
|
3 | 3 |
|
4 |
| -#get url |
5 |
| -#url=input('Enter a URL (include 'http://'):')--this is wrong |
| 4 | +# get url |
6 | 5 | url=input('Enter a URL (include `http://`): ')
|
7 | 6 |
|
| 7 | +# connect to the url |
| 8 | +website=requests.get(url) |
8 | 9 |
|
9 |
| -#connect to the url |
10 |
| -website=requests.get(url) |
| 10 | +# read html |
| 11 | +html=website.text |
11 | 12 |
|
12 |
| -#read html |
13 |
| -html=website.text |
14 |
| - |
15 |
| - |
16 |
| -#use re.findall to grab all the links |
| 13 | +# use re.findall to grab all the links |
17 | 14 | links=re.findall('"((http|ftp)s?://.*?)"',html)
|
| 15 | +emails=re.findall('([\w\.,]+@[\w\.,]+\.\w+)',html) |
18 | 16 |
|
19 |
| -emails=re.findall('([\w\.,]+@[\w\.,]+\.\w+)',html) |
20 | 17 |
|
21 |
| - |
22 |
| -#prints the number of links in the list |
| 18 | +# print the number of links in the list |
23 | 19 | print("\nFound {} links".format(len(links)))
|
24 |
| - |
25 | 20 | foremailinemails:
|
26 |
| -print(email) |
| 21 | +print(email) |