2525 ES_LOCAL_API_KEY - API key for authentication (optional, enables API key auth)
2626 ES_LOCAL_USERNAME - Username for basic authentication (optional)
2727 ES_LOCAL_PASSWORD - Password for basic authentication (optional)
28+ ES_BULK_DELAY - Delay in seconds between bulk indexing chunks (default: 1)
2829
2930Requirements:
3031 - Python 3.11+
3536import sys
3637import json
3738import os
39+ import time
3840from pathlib import Path
3941from elasticsearch import Elasticsearch ,helpers
4042from elasticsearch .exceptions import ConnectionError ,RequestError
@@ -399,9 +401,18 @@ def sarif_results_generator(sarif_files, index_name):
399401 )
400402
401403
402- def index_sarif_files (sarif_files ,index_name ,host ,api_key = None ,username = None ,password = None ):
404+ def index_sarif_files (sarif_files ,index_name ,host ,api_key = None ,username = None ,password = None , bulk_delay = 1 ):
403405"""
404406 Connect to Elasticsearch and bulk index all SARIF results with progress logging.
407+
408+ Args:
409+ sarif_files: List of SARIF file paths to index
410+ index_name: Name of the Elasticsearch index to create
411+ host: Elasticsearch host URL
412+ api_key: Optional API key for authentication
413+ username: Optional username for basic auth
414+ password: Optional password for basic auth
415+ bulk_delay: Delay in seconds between bulk indexing chunks (default: 1)
405416 """
406417es_client = create_elasticsearch_client (host ,api_key ,username ,password )
407418
@@ -418,17 +429,20 @@ def index_sarif_files(sarif_files, index_name, host, api_key=None, username=None
418429return False
419430
420431print (f"Indexing results from{ len (sarif_files )} SARIF files..." )
432+ if bulk_delay > 0 :
433+ print (f"Bulk delay:{ bulk_delay } second(s) between chunks" )
421434print ()
422435
423436try :
424437# Track progress during bulk indexing
425438documents_indexed = 0
426439last_progress_update = 0
427440progress_interval = 100 # Update every 100 documents
441+ chunks_processed = 0
428442
429443def progress_callback (success ,info ):
430444"""Callback to track progress during bulk indexing."""
431- nonlocal documents_indexed ,last_progress_update
445+ nonlocal documents_indexed ,last_progress_update , chunks_processed
432446documents_indexed += 1
433447
434448# Print progress updates periodically
@@ -449,12 +463,22 @@ def progress_callback(success, info):
449463raise_on_error = False ,
450464 ):
451465progress_callback (success ,info )
466+
467+ # Check if we just completed a chunk and should sleep
468+ # streaming_bulk yields one result per document, so we track chunks
469+ if documents_indexed > 0 and documents_indexed % 500 == 0 :
470+ chunks_processed += 1
471+ if bulk_delay > 0 :
472+ print (f" → Sleeping{ bulk_delay } s after chunk{ chunks_processed } ..." )
473+ time .sleep (bulk_delay )
452474
453475print (f" → Indexed{ documents_indexed } documents (final)" )
454476print ()
455477print ("-" * 50 )
456478print (f"✓ Bulk indexing complete" )
457479print (f"✓ Total documents indexed:{ documents_indexed } " )
480+ if chunks_processed > 0 :
481+ print (f"✓ Total chunks processed:{ chunks_processed } " )
458482
459483# Get final index stats to verify
460484stats = es_client .indices .stats (index = index_name )
@@ -488,11 +512,13 @@ def main():
488512print (" ES_LOCAL_API_KEY - API key for authentication (optional)" )
489513print (" ES_LOCAL_USERNAME - Username for basic authentication (optional)" )
490514print (" ES_LOCAL_PASSWORD - Password for basic authentication (optional)" )
515+ print (" ES_BULK_DELAY - Delay in seconds between bulk chunks (default: 1)" )
491516print ()
492517print ("Example:" )
493518print (f" python{ sys .argv [0 ]} sarif-files.txt sarif_results_2024" )
494519print (" ES_LOCAL_URL=https://my-cluster.elastic.co:9243\\ " )
495520print (" ES_LOCAL_API_KEY=your_api_key\\ " )
521+ print (" ES_BULK_DELAY=1\\ " )
496522print (f" python{ sys .argv [0 ]} sarif-files.txt sarif_results_2024" )
497523sys .exit (1 )
498524
@@ -509,6 +535,7 @@ def main():
509535elastic_api_key = os .getenv ("ES_LOCAL_API_KEY" )
510536elastic_username = os .getenv ("ES_LOCAL_USERNAME" )
511537elastic_password = os .getenv ("ES_LOCAL_PASSWORD" )
538+ bulk_delay = float (os .getenv ("ES_BULK_DELAY" ,"1" ))
512539
513540# Handle variable substitution in ES_LOCAL_URL if needed
514541if elastic_host and "${ES_LOCAL_PORT}" in elastic_host :
@@ -538,6 +565,8 @@ def main():
538565print (f"Elasticsearch index:{ index_name } " )
539566print (f"Elasticsearch host:{ elastic_host } " )
540567print (f"Authentication:{ auth_method } " )
568+ if bulk_delay > 0 :
569+ print (f"Bulk delay:{ bulk_delay } second(s) between chunks" )
541570print ()
542571
543572# Read and validate SARIF files list
@@ -547,7 +576,7 @@ def main():
547576sys .exit (1 )
548577
549578# Index the files
550- if index_sarif_files (sarif_files ,index_name ,elastic_host ,elastic_api_key ,elastic_username ,elastic_password ):
579+ if index_sarif_files (sarif_files ,index_name ,elastic_host ,elastic_api_key ,elastic_username ,elastic_password , bulk_delay ):
551580print (f"\n ✓ Successfully created and populated index '{ index_name } '" )
552581print (f"You can now query the index using Elasticsearch APIs or Kibana." )
553582sys .exit (0 )