22"""
33PostgreSQL Reports Generator using PromQL
44
5- This script generates reports for specific PostgreSQL check types (A002, A003, A004, A007, H001, F005, F004)
5+ This script generates reports for specific PostgreSQL check types (A002, A003, A004, A007, H001, F005, F004, K001, K003 )
66by querying Prometheus metrics using PromQL queries.
77"""
88
@@ -428,6 +428,325 @@ def generate_f004_heap_bloat_report(self, cluster: str = "local", node_name: str
428428"total_bloat_size_pretty" :self .format_bytes (total_bloat_size )
429429 },node_name )
430430
431+ def generate_k001_query_calls_report (self ,cluster :str = "local" ,node_name :str = "node-01" ,
432+ time_range_minutes :int = 60 )-> Dict [str ,Any ]:
433+ """
434+ Generate K001 Globally Aggregated Query Metrics report (sorted by calls).
435+
436+ Args:
437+ cluster: Cluster name
438+ node_name: Node name
439+ time_range_minutes: Time range in minutes for metrics collection
440+
441+ Returns:
442+ Dictionary containing query metrics sorted by calls
443+ """
444+ print ("Generating K001 Globally Aggregated Query Metrics report..." )
445+
446+ # Calculate time range
447+ end_time = datetime .now ()
448+ start_time = end_time - timedelta (minutes = time_range_minutes )
449+
450+ # Get pg_stat_statements metrics using the same logic as CSV endpoint
451+ query_metrics = self ._get_pgss_metrics_data (cluster ,node_name ,start_time ,end_time )
452+
453+ # Sort by calls (descending)
454+ sorted_metrics = sorted (query_metrics ,key = lambda x :x .get ('calls' ,0 ),reverse = True )
455+
456+ # Calculate totals
457+ total_calls = sum (q .get ('calls' ,0 )for q in sorted_metrics )
458+ total_time = sum (q .get ('total_time' ,0 )for q in sorted_metrics )
459+ total_rows = sum (q .get ('rows' ,0 )for q in sorted_metrics )
460+
461+ return self .format_report_data ("K001" , {
462+ "query_metrics" :sorted_metrics ,
463+ "summary" : {
464+ "total_queries" :len (sorted_metrics ),
465+ "total_calls" :total_calls ,
466+ "total_time_ms" :total_time ,
467+ "total_rows" :total_rows ,
468+ "time_range_minutes" :time_range_minutes ,
469+ "start_time" :start_time .isoformat (),
470+ "end_time" :end_time .isoformat ()
471+ }
472+ },node_name )
473+
474+ def generate_k003_top_queries_report (self ,cluster :str = "local" ,node_name :str = "node-01" ,
475+ time_range_minutes :int = 60 ,limit :int = 50 )-> Dict [str ,Any ]:
476+ """
477+ Generate K003 Top-50 Queries by total_time report.
478+
479+ Args:
480+ cluster: Cluster name
481+ node_name: Node name
482+ time_range_minutes: Time range in minutes for metrics collection
483+ limit: Number of top queries to return (default: 50)
484+
485+ Returns:
486+ Dictionary containing top queries sorted by total execution time
487+ """
488+ print ("Generating K003 Top-50 Queries by total_time report..." )
489+
490+ # Calculate time range
491+ end_time = datetime .now ()
492+ start_time = end_time - timedelta (minutes = time_range_minutes )
493+
494+ # Get pg_stat_statements metrics using the same logic as CSV endpoint
495+ query_metrics = self ._get_pgss_metrics_data (cluster ,node_name ,start_time ,end_time )
496+
497+ # Sort by total_time (descending) and limit to top N
498+ sorted_metrics = sorted (query_metrics ,key = lambda x :x .get ('total_time' ,0 ),reverse = True )[:limit ]
499+
500+ # Calculate totals for the top queries
501+ total_calls = sum (q .get ('calls' ,0 )for q in sorted_metrics )
502+ total_time = sum (q .get ('total_time' ,0 )for q in sorted_metrics )
503+ total_rows = sum (q .get ('rows' ,0 )for q in sorted_metrics )
504+
505+ return self .format_report_data ("K003" , {
506+ "top_queries" :sorted_metrics ,
507+ "summary" : {
508+ "queries_returned" :len (sorted_metrics ),
509+ "total_calls" :total_calls ,
510+ "total_time_ms" :total_time ,
511+ "total_rows" :total_rows ,
512+ "time_range_minutes" :time_range_minutes ,
513+ "start_time" :start_time .isoformat (),
514+ "end_time" :end_time .isoformat (),
515+ "limit" :limit
516+ }
517+ },node_name )
518+
519+ def _get_pgss_metrics_data (self ,cluster :str ,node_name :str ,start_time :datetime ,end_time :datetime )-> List [Dict [str ,Any ]]:
520+ """
521+ Get pg_stat_statements metrics data between two time points.
522+ Adapted from the logic in flask-backend/app.py get_pgss_metrics_csv().
523+
524+ Args:
525+ cluster: Cluster name
526+ node_name: Node name
527+ start_time: Start datetime
528+ end_time: End datetime
529+
530+ Returns:
531+ List of query metrics with calculated differences
532+ """
533+ # Metric name mapping for cleaner output
534+ METRIC_NAME_MAPPING = {
535+ 'calls' :'calls' ,
536+ 'exec_time_total' :'total_time' ,
537+ 'rows' :'rows' ,
538+ 'shared_bytes_hit_total' :'shared_blks_hit' ,
539+ 'shared_bytes_read_total' :'shared_blks_read' ,
540+ 'shared_bytes_dirtied_total' :'shared_blks_dirtied' ,
541+ 'shared_bytes_written_total' :'shared_blks_written' ,
542+ 'block_read_total' :'blk_read_time' ,
543+ 'block_write_total' :'blk_write_time'
544+ }
545+
546+ # Build filters
547+ filters = [f'cluster="{ cluster } "' ,f'node_name="{ node_name } "' ]
548+ filter_str = '{' + ',' .join (filters )+ '}'
549+
550+ # Get all pg_stat_statements metrics
551+ all_metrics = [
552+ 'pgwatch_pg_stat_statements_calls' ,
553+ 'pgwatch_pg_stat_statements_exec_time_total' ,
554+ 'pgwatch_pg_stat_statements_rows' ,
555+ 'pgwatch_pg_stat_statements_shared_bytes_hit_total' ,
556+ 'pgwatch_pg_stat_statements_shared_bytes_read_total' ,
557+ 'pgwatch_pg_stat_statements_shared_bytes_dirtied_total' ,
558+ 'pgwatch_pg_stat_statements_shared_bytes_written_total' ,
559+ 'pgwatch_pg_stat_statements_block_read_total' ,
560+ 'pgwatch_pg_stat_statements_block_write_total'
561+ ]
562+
563+ # Get metrics at start and end times
564+ start_data = []
565+ end_data = []
566+
567+ for metric in all_metrics :
568+ metric_with_filters = f'{ metric } { filter_str } '
569+
570+ try :
571+ # Query metrics around start time - use instant queries at specific timestamps
572+ start_result = self .query_range (metric_with_filters ,start_time - timedelta (minutes = 1 ),start_time + timedelta (minutes = 1 ))
573+ if start_result :
574+ start_data .extend (start_result )
575+
576+ # Query metrics around end time
577+ end_result = self .query_range (metric_with_filters ,end_time - timedelta (minutes = 1 ),end_time + timedelta (minutes = 1 ))
578+ if end_result :
579+ end_data .extend (end_result )
580+
581+ except Exception as e :
582+ print (f"Warning: Failed to query metric{ metric } :{ e } " )
583+ continue
584+
585+ # Process the data to calculate differences
586+ return self ._process_pgss_data (start_data ,end_data ,start_time ,end_time ,METRIC_NAME_MAPPING )
587+
588+ def query_range (self ,query :str ,start_time :datetime ,end_time :datetime ,step :str = "30s" )-> List [Dict [str ,Any ]]:
589+ """
590+ Execute a range PromQL query.
591+
592+ Args:
593+ query: PromQL query string
594+ start_time: Start time
595+ end_time: End time
596+ step: Query step interval
597+
598+ Returns:
599+ List of query results
600+ """
601+ params = {
602+ 'query' :query ,
603+ 'start' :start_time .timestamp (),
604+ 'end' :end_time .timestamp (),
605+ 'step' :step
606+ }
607+
608+ try :
609+ response = requests .get (f"{ self .base_url } /query_range" ,params = params )
610+ if response .status_code == 200 :
611+ result = response .json ()
612+ if result .get ('status' )== 'success' :
613+ return result .get ('data' , {}).get ('result' , [])
614+ else :
615+ print (f"Range query failed with status{ response .status_code } :{ response .text } " )
616+ except Exception as e :
617+ print (f"Range query error:{ e } " )
618+
619+ return []
620+
621+ def _process_pgss_data (self ,start_data :List [Dict ],end_data :List [Dict ],
622+ start_time :datetime ,end_time :datetime ,
623+ metric_mapping :Dict [str ,str ])-> List [Dict [str ,Any ]]:
624+ """
625+ Process pg_stat_statements data and calculate differences between start and end times.
626+ Adapted from the logic in flask-backend/app.py process_pgss_data().
627+ """
628+ # Convert Prometheus data to dictionaries
629+ start_metrics = self ._prometheus_to_dict (start_data ,start_time )
630+ end_metrics = self ._prometheus_to_dict (end_data ,end_time )
631+
632+ if not start_metrics and not end_metrics :
633+ return []
634+
635+ # Create a combined dictionary with all unique query identifiers
636+ all_keys = set ()
637+ all_keys .update (start_metrics .keys ())
638+ all_keys .update (end_metrics .keys ())
639+
640+ result_rows = []
641+
642+ # Calculate differences for each query
643+ for key in all_keys :
644+ start_metric = start_metrics .get (key , {})
645+ end_metric = end_metrics .get (key , {})
646+
647+ # Extract identifier components from key
648+ db_name ,query_id ,user ,instance = key
649+
650+ # Calculate actual duration from metric timestamps
651+ start_timestamp = start_metric .get ('timestamp' )
652+ end_timestamp = end_metric .get ('timestamp' )
653+
654+ if start_timestamp and end_timestamp :
655+ start_dt = datetime .fromisoformat (start_timestamp )
656+ end_dt = datetime .fromisoformat (end_timestamp )
657+ actual_duration = (end_dt - start_dt ).total_seconds ()
658+ else :
659+ # Fallback to query parameter duration if timestamps are missing
660+ actual_duration = (end_time - start_time ).total_seconds ()
661+
662+ # Create result row
663+ row = {
664+ 'queryid' :query_id ,
665+ 'database' :db_name ,
666+ 'user' :user ,
667+ 'duration_seconds' :actual_duration
668+ }
669+
670+ # Numeric columns to calculate differences for (using original metric names)
671+ numeric_cols = list (metric_mapping .keys ())
672+
673+ # Calculate differences and rates
674+ for col in numeric_cols :
675+ start_val = start_metric .get (col ,0 )
676+ end_val = end_metric .get (col ,0 )
677+ diff = end_val - start_val
678+
679+ # Use simplified display name
680+ display_name = metric_mapping [col ]
681+
682+ # Convert bytes to blocks for block-related metrics (PostgreSQL uses 8KB blocks)
683+ if 'blks' in display_name and 'bytes' in col :
684+ diff = diff / 8192 # Convert bytes to 8KB blocks
685+
686+ row [display_name ]= diff
687+
688+ # Calculate rates per second
689+ if row ['duration_seconds' ]> 0 :
690+ row [f'{ display_name } _per_sec' ]= diff / row ['duration_seconds' ]
691+ else :
692+ row [f'{ display_name } _per_sec' ]= 0
693+
694+ # Calculate per-call averages
695+ calls_diff = row .get ('calls' ,0 )
696+ if calls_diff > 0 :
697+ row [f'{ display_name } _per_call' ]= diff / calls_diff
698+ else :
699+ row [f'{ display_name } _per_call' ]= 0
700+
701+ result_rows .append (row )
702+
703+ return result_rows
704+
705+ def _prometheus_to_dict (self ,prom_data :List [Dict ],timestamp :datetime )-> Dict :
706+ """
707+ Convert Prometheus API response to dictionary keyed by query identifiers.
708+ Adapted from the logic in flask-backend/app.py prometheus_to_dict().
709+ """
710+ if not prom_data :
711+ return {}
712+
713+ metrics_dict = {}
714+
715+ for metric_data in prom_data :
716+ metric = metric_data .get ('metric' , {})
717+ values = metric_data .get ('values' , [])
718+
719+ if not values :
720+ continue
721+
722+ # Get the closest value to our timestamp
723+ closest_value = min (values ,key = lambda x :abs (float (x [0 ])- timestamp .timestamp ()))
724+
725+ # Create unique key for this query
726+ key = (
727+ metric .get ('datname' ,'' ),
728+ metric .get ('queryid' ,'' ),
729+ metric .get ('user' ,'' ),
730+ metric .get ('instance' ,'' )
731+ )
732+
733+ # Initialize metric dict if not exists
734+ if key not in metrics_dict :
735+ metrics_dict [key ]= {
736+ 'timestamp' :datetime .fromtimestamp (float (closest_value [0 ])).isoformat (),
737+ }
738+
739+ # Add metric value
740+ metric_name = metric .get ('__name__' ,'pgwatch_pg_stat_statements_calls' )
741+ clean_name = metric_name .replace ('pgwatch_pg_stat_statements_' ,'' )
742+
743+ try :
744+ metrics_dict [key ][clean_name ]= float (closest_value [1 ])
745+ except (ValueError ,IndexError ):
746+ metrics_dict [key ][clean_name ]= 0
747+
748+ return metrics_dict
749+
431750def format_bytes (self ,bytes_value :float )-> str :
432751"""Format bytes value for human readable display."""
433752if bytes_value == 0 :
@@ -619,6 +938,8 @@ def generate_all_reports(self, cluster: str = "local", node_name: str = "node-01
619938reports ['H001' ]= self .generate_h001_invalid_indexes_report (cluster ,node_name )
620939reports ['F005' ]= self .generate_f005_btree_bloat_report (cluster ,node_name )
621940reports ['F004' ]= self .generate_f004_heap_bloat_report (cluster ,node_name )
941+ reports ['K001' ]= self .generate_k001_query_calls_report (cluster ,node_name )
942+ reports ['K003' ]= self .generate_k003_top_queries_report (cluster ,node_name )
622943
623944return reports
624945def create_report (self ,api_url ,token ,project ,epoch ):
@@ -673,7 +994,7 @@ def main():
673994help = 'Cluster name (default: local)' )
674995parser .add_argument ('--node-name' ,default = 'node-01' ,
675996help = 'Node name (default: node-01)' )
676- parser .add_argument ('--check-id' ,choices = ['A002' ,'A003' ,'A004' ,'A007' ,'H001' ,'F005' ,'F004' ,'ALL' ],
997+ parser .add_argument ('--check-id' ,choices = ['A002' ,'A003' ,'A004' ,'A007' ,'H001' ,'F005' ,'F004' ,'K001' , 'K003' , ' ALL' ],
677998help = 'Specific check ID to generate (default: ALL)' )
678999parser .add_argument ('--output' ,default = '-' ,
6791000help = 'Output file (default: stdout)' )
@@ -704,7 +1025,6 @@ def main():
7041025if not args .no_upload :
7051026generator .upload_report_file (args .api_url ,args .token ,report_id ,f"{ report } .json" )
7061027if args .output == '-' :
707-
7081028pass
7091029else :
7101030with open (args .output ,'w' )as f :
@@ -726,6 +1046,10 @@ def main():
7261046report = generator .generate_f005_btree_bloat_report (args .cluster ,args .node_name )
7271047elif args .check_id == 'F004' :
7281048report = generator .generate_f004_heap_bloat_report (args .cluster ,args .node_name )
1049+ elif args .check_id == 'K001' :
1050+ report = generator .generate_k001_query_calls_report (args .cluster ,args .node_name )
1051+ elif args .check_id == 'K003' :
1052+ report = generator .generate_k003_top_queries_report (args .cluster ,args .node_name )
7291053
7301054if args .output == '-' :
7311055print (json .dumps (report ,indent = 2 ))