Commita1f45ff

committed

Add a script to lift CERT risk assessment tags from help files

1 parent12df863 commita1f45ffCopy full SHA for a1f45ff

File tree

1 file changed

+160

-0

lines changed

scripts
- add_risk_assessment_tags.py

1 file changed

+160

-0

lines changed

`‎scripts/add_risk_assessment_tags.py‎`

Lines changed: 160 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,160 @@`
	`1`	`+#!/usr/bin/env python3`
	`2`	`+"""`
	`3`	`+Add risk assessment tags to rule package JSON files.`
	`4`	`+`
	`5`	`+This script:`
	`6`	`+1. Iterates through each JSON file in rule_packages directory`
	`7`	`+2. Looks for CERT-C or CERT-CPP sections`
	`8`	`+3. For each rule, finds the corresponding markdown file`
	`9`	`+4. Extracts risk assessment data from the markdown file`
	`10`	`+5. Adds risk assessment data as tags to each query in the JSON file`
	`11`	`+"""`
	`12`	`+`
	`13`	`+importos`
	`14`	`+importjson`
	`15`	`+importre`
	`16`	`+importglob`
	`17`	`+frombs4importBeautifulSoup`
	`18`	`+importlogging`
	`19`	`+`
	`20`	`+logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(levelname)s - %(message)s')`
	`21`	`+logger=logging.getLogger(__name__)`
	`22`	`+`
	`23`	`+deffind_rule_packages():`
	`24`	`+"""Find all JSON rule package files in the rule_packages directory."""`
	`25`	`+repo_root=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))`
	`26`	`+rule_packages_dir=os.path.join(repo_root,"rule_packages")`
	`27`	`+returnglob.glob(os.path.join(rule_packages_dir,"*",".json"),recursive=True)`
	`28`	`+`
	`29`	`+defextract_risk_assessment_from_md(md_file_path):`
	`30`	`+"""Extract risk assessment data from the markdown file."""`
	`31`	`+risk_data= {}`
	`32`	`+`
	`33`	`+try:`
	`34`	`+withopen(md_file_path,'r',encoding='utf-8')asf:`
	`35`	`+content=f.read()`
	`36`	`+`
	`37`	`+# Find the Risk Assessment section`
	`38`	`+risk_section_match=re.search(r'## Risk Assessment(.*?)##',content,re.DOTALL)`
	`39`	`+ifnotrisk_section_match:`
	`40`	`+# Try to find it as the last section`
	`41`	`+risk_section_match=re.search(r'## Risk Assessment(.*?)$',content,re.DOTALL)`
	`42`	`+ifnotrisk_section_match:`
	`43`	`+logger.warning(f"No Risk Assessment section found in{md_file_path}")`
	`44`	`+returnrisk_data`
	`45`	`+`
	`46`	`+risk_section=risk_section_match.group(1)`
	`47`	`+`
	`48`	`+# Look for the table with risk assessment data`
	`49`	`+table_match=re.search(r'<table>(.*?)</table>',risk_section,re.DOTALL)`
	`50`	`+ifnottable_match:`
	`51`	`+logger.warning(f"No risk assessment table found in{md_file_path}")`
	`52`	`+returnrisk_data`
	`53`	`+`
	`54`	`+table_html=table_match.group(0)`
	`55`	`+soup=BeautifulSoup(table_html,'html.parser')`
	`56`	`+`
	`57`	`+# Find all rows in the table`
	`58`	`+rows=soup.find_all('tr')`
	`59`	`+iflen(rows)<2:# Need at least header and data row`
	`60`	`+logger.warning(f"Incomplete risk assessment table in{md_file_path}")`
	`61`	`+returnrisk_data`
	`62`	`+`
	`63`	`+# Extract headers and values`
	`64`	`+headers= [th.get_text().strip()forthinrows[0].find_all('th')]`
	`65`	`+values= [td.get_text().strip()fortdinrows[1].find_all('td')]`
	`66`	`+`
	`67`	`+# Create a dictionary of headers and values`
	`68`	`+iflen(headers)==len(values):`
	`69`	`+fori,headerinenumerate(headers):`
	`70`	`+risk_data[header]=values[i]`
	`71`	`+else:`
	`72`	`+logger.warning(f"Header and value count mismatch in{md_file_path}")`
	`73`	`+`
	`74`	`+exceptExceptionase:`
	`75`	`+logger.error(f"Error extracting risk assessment from{md_file_path}:{e}")`
	`76`	`+`
	`77`	`+returnrisk_data`
	`78`	`+`
	`79`	`+deffind_md_file(rule_id,short_name,language):`
	`80`	`+"""Find the markdown file for the given rule ID and short name."""`
	`81`	`+repo_root=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))`
	`82`	`+md_path=os.path.join(repo_root,language,"cert","src","rules",rule_id,f"{short_name}.md")`
	`83`	`+`
	`84`	`+ifos.path.exists(md_path):`
	`85`	`+returnmd_path`
	`86`	`+else:`
	`87`	`+# Try without short name (sometimes the file is named after the rule ID)`
	`88`	`+md_path=os.path.join(repo_root,language,"cert","src","rules",rule_id,f"{rule_id}.md")`
	`89`	`+ifos.path.exists(md_path):`
	`90`	`+returnmd_path`
	`91`	`+else:`
	`92`	`+logger.warning(f"Could not find markdown file for{language} rule{rule_id} ({short_name})")`
	`93`	`+returnNone`
	`94`	`+`
	`95`	`+defprocess_rule_package(rule_package_file):`
	`96`	`+"""Process a single rule package JSON file."""`
	`97`	`+try:`
	`98`	`+withopen(rule_package_file,'r',encoding='utf-8')asf:`
	`99`	`+data=json.load(f)`
	`100`	`+`
	`101`	`+modified=False`
	`102`	`+`
	`103`	`+# Look for CERT-C and CERT-CPP sections`
	`104`	`+forcert_keyin ["CERT-C","CERT-C++"]:`
	`105`	`+ifcert_keyindata:`
	`106`	`+language="c"ifcert_key=="CERT-C"else"cpp"`
	`107`	`+`
	`108`	`+# Process each rule in the CERT section`
	`109`	`+forrule_id,rule_dataindata[cert_key].items():`
	`110`	`+if"queries"inrule_data:`
	`111`	`+forqueryinrule_data["queries"]:`
	`112`	`+if"short_name"inquery:`
	`113`	`+md_file=find_md_file(rule_id,query["short_name"],language)`
	`114`	`+`
	`115`	`+ifmd_file:`
	`116`	`+risk_data=extract_risk_assessment_from_md(md_file)`
	`117`	`+`
	`118`	`+ifrisk_data:`
	`119`	`+# Add risk assessment data as tags`
	`120`	`+if"tags"notinquery:`
	`121`	`+query["tags"]= []`
	`122`	`+`
	`123`	`+# Add each risk assessment property as a tag`
	`124`	`+forkey,valueinrisk_data.items():`
	`125`	`+key_sanitized=key.lower().replace(" ","-")`
	`126`	`+ifkey_sanitized=="rule":`
	`127`	`+# skip rule, as that is already in the rule ID`
	`128`	`+continue`
	`129`	`+tag=f"external/cert/{key_sanitized}/{value.lower()}"`
	`130`	`+iftagnotinquery["tags"]:`
	`131`	`+query["tags"].append(tag)`
	`132`	`+modified=True`
	`133`	`+logger.info(f"Added tag{tag} to{rule_id} ({query['short_name']})")`
	`134`	`+`
	`135`	`+# Save the modified data back to the file if any changes were made`
	`136`	`+ifmodified:`
	`137`	`+withopen(rule_package_file,'w',encoding='utf-8')asf:`
	`138`	`+json.dump(data,f,indent=2)`
	`139`	`+logger.info(f"Updated{rule_package_file}")`
	`140`	`+else:`
	`141`	`+logger.info(f"No changes made to{rule_package_file}")`
	`142`	`+`
	`143`	`+exceptExceptionase:`
	`144`	`+logger.error(f"Error processing{rule_package_file}:{e}")`
	`145`	`+`
	`146`	`+defmain():`
	`147`	`+"""Main function to process all rule packages."""`
	`148`	`+logger.info("Starting risk assessment tag addition process")`
	`149`	`+`
	`150`	`+rule_packages=find_rule_packages()`
	`151`	`+logger.info(f"Found{len(rule_packages)} rule package files")`
	`152`	`+`
	`153`	`+forrule_packageinrule_packages:`
	`154`	`+logger.info(f"Processing{rule_package}")`
	`155`	`+process_rule_package(rule_package)`
	`156`	`+`
	`157`	`+logger.info("Completed risk assessment tag addition process")`
	`158`	`+`
	`159`	`+if__name__=="__main__":`
	`160`	`+main()`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commita1f45ff

File tree

1 file changed

1 file changed

`‎scripts/add_risk_assessment_tags.py‎`

0 commit comments