@@ -376,7 +376,7 @@ def analyticsToDf(src):
376376analytic_rows = []
377377logsource_rows = []
378378analytic_to_ds_rows = []
379- failed_analytics = set ()
379+ failed_by_data_component = {}
380380
381381# analytics to detection strategies
382382analytic_to_ds_map = {}
@@ -392,19 +392,29 @@ def analyticsToDf(src):
392392
393393# Prints out errors where data components are not in the same domain as analytics
394394for analytic in tqdm (analytics ,desc = "parsing analytics" ):
395+ analytic_id = analytic .get ("id" )
395396for logsrc in analytic .get ("x_mitre_log_source_references" , []):
396397data_comp_id = logsrc .get ("x_mitre_data_component_ref" ,"" )
397398data_comp = src .get (data_comp_id )
398399try :
399400data_comp_attack_id = data_comp ["external_references" ][0 ]["external_id" ]
400401except (KeyError ,TypeError ,IndexError ,AttributeError ):
401- failed_analytics .add ((analytic ["id" ],data_comp_id ))
402-
403- if failed_analytics :
404- raise RuntimeError (
405- f"{ len (failed_analytics )} failures:\n " +
406- "\n " .join (f"analytic={ a } , data_component={ d } " for a ,d in sorted (failed_analytics ))
407- )
402+ if data_comp_id not in failed_by_data_component :
403+ failed_by_data_component [data_comp_id ]= []
404+ failed_by_data_component [data_comp_id ].append (analytic_id )
405+
406+ if failed_by_data_component :
407+ lines = ["Failures grouped by data component:\n " ]
408+ for dc_id in sorted (failed_by_data_component ):
409+ analytic_ids = sorted (set (failed_by_data_component [dc_id ]))
410+ dc_obj = src .get (dc_id )or {}
411+ dc_name = dc_obj .get ("name" ,"" )
412+
413+ lines .append (f"data_component={ dc_id } " + (f" ({ dc_name } )" if dc_name else "" ))
414+ lines .extend ([f" - analytic={ a } " for a in analytic_ids ])
415+ lines .append ("" )
416+
417+ raise RuntimeError ("\n " .join (lines ))
408418
409419for analytic in tqdm (analytics ,desc = "parsing analytics" ):
410420analytic_rows .append (parseBaseStix (analytic ))