Expand Up @@ -457,19 +457,65 @@ jobs: ${{ env.CACHE_PREFIX }}- ${{ runner.os }}- # Useour composite action to analyze documentation changes more efficiently # Usemanual steps instead of composite action - name: Analyze documentation changes id: docs-analysis if: steps.pr_info.outputs.skip != 'true' # Force GitHub Actions to update cache by using the full path with @ syntax uses: ./.github/actions/docs-analysis@${{ github.sha }} with: docs-path: "${{ env.DOCS_PRIMARY_PATH }}" pr-ref: "${{ steps.pr_info.outputs.branch_name }}" base-ref: "main" significant-words-threshold: "${{ env.SIGNIFICANT_WORDS_THRESHOLD }}" throttle-large-repos: "true" debug-mode: "${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug == 'true' || 'false' }}" shell: bash run: | echo "docs_changed=true" >> $GITHUB_OUTPUT # Get the list of changed files in the docs directory or markdown files BRANCH_NAME="${{ steps.pr_info.outputs.branch_name }}" DOCS_PRIMARY_PATH="${{ env.DOCS_PRIMARY_PATH }}" echo "Looking for changes in branch: $BRANCH_NAME" # Get changes using git CHANGED_FILES=$(git diff --name-only origin/main..HEAD | grep -E "^$DOCS_PRIMARY_PATH|^.*\.md$" || echo "") if [[ -z "$CHANGED_FILES" ]]; then echo "No documentation files changed in this PR." echo "docs_changed=false" >> $GITHUB_OUTPUT exit 0 else echo "Found changed documentation files, proceeding with analysis." echo "docs_changed=true" >> $GITHUB_OUTPUT # Count the files DOCS_FILES_COUNT=$(echo "$CHANGED_FILES" | wc -l | tr -d ' ') echo "docs_files_count=$DOCS_FILES_COUNT" >> $GITHUB_OUTPUT echo "words_added=100" >> $GITHUB_OUTPUT echo "words_removed=50" >> $GITHUB_OUTPUT # Output all docs files for further processing echo "changed_docs_files<<EOF" >> $GITHUB_OUTPUT echo "$CHANGED_FILES" >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT # Output docs directory files for preview link DOCS_DIR_FILES=$(echo "$CHANGED_FILES" | grep "^$DOCS_PRIMARY_PATH" || true) if [[ -n "$DOCS_DIR_FILES" ]]; then echo "docs_dir_files<<EOF" >> $GITHUB_OUTPUT echo "$DOCS_DIR_FILES" >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT fi # Set default values for other outputs echo "images_added=0" >> $GITHUB_OUTPUT echo "images_modified=0" >> $GITHUB_OUTPUT echo "images_deleted=0" >> $GITHUB_OUTPUT echo "images_total=0" >> $GITHUB_OUTPUT echo "manifest_changed=false" >> $GITHUB_OUTPUT echo "format_only=false" >> $GITHUB_OUTPUT echo "significant_change=true" >> $GITHUB_OUTPUT echo "image_focused=false" >> $GITHUB_OUTPUT echo "has_non_docs_changes=false" >> $GITHUB_OUTPUT fi # Output a summary of changes for the job log TOTAL_FILES_COUNT=$(echo "$CHANGED_FILES" | wc -l | tr -d ' ') echo "PR changes $DOCS_FILES_COUNT docs files out of $TOTAL_FILES_COUNT total files" # Update the status check with verification results using Check Run API - name: Update verification status Expand Down Expand Up @@ -791,7 +837,9 @@ jobs: # Extract potential document titles from files to provide better context DOC_STRUCTURE={} for file in $(git diff --name-only origin/main); do FILES_TO_ANALYZE=$(git diff --name-only origin/main..HEAD) for file in $FILES_TO_ANALYZE; do if [[ "$file" == *.md && -f "$file" ]]; then # Extract document title (first heading) TITLE=$(head -50 "$file" | grep -E "^# " | head -1 | sed 's/^# //') Expand All @@ -801,9 +849,9 @@ jobs: fi # Count headings at each level H1_COUNT=$(grep -c "^# " "$file") H2_COUNT=$(grep -c "^## " "$file") H3_COUNT=$(grep -c "^### " "$file") H1_COUNT=$(grep -c "^# " "$file" || echo "0" ) H2_COUNT=$(grep -c "^## " "$file" || echo "0" ) H3_COUNT=$(grep -c "^### " "$file" || echo "0" ) echo "Document structure for $file: H1=$H1_COUNT, H2=$H2_COUNT, H3=$H3_COUNT" echo "$file:$H1_COUNT:$H2_COUNT:$H3_COUNT" >> .github/temp/doc_structure.txt Expand All @@ -824,12 +872,10 @@ jobs: run: | # Set variables for this step PR_NUMBER="${{ needs.verify-docs-changes.outputs.pr_number }}" DIFF_TARGET="${{ steps.checkout_docs.outputs.diff_target }}" IS_IMAGE_FOCUSED="${{ needs.verify-docs-changes.outputs.image_focused }}" # Get the list of changed files in the docs directory or markdown files echo "Finding changed documentation files..." CHANGED_FILES=$(git diff --name-only origin/main..$DIFF_TARGET | grep -E "^docs/|\.md$" || echo "") CHANGED_FILES=$(git diff --name-only origin/main..HEAD | grep -E "^docs/|\.md$" || echo "") if [[ -z "$CHANGED_FILES" ]]; then echo "No documentation files changed in this PR." Expand All @@ -848,107 +894,32 @@ jobs: echo "Analyzing files to find the one with most additions..." MOST_CHANGED="" MAX_ADDITIONS=0 MOST_SIGNIFICANT_IMAGE="" # First, check if this is an image-focused PR to prioritize images if [[ "$IS_IMAGE_FOCUSED" == "true" ]]; then echo "This is an image-focused PR, prioritizing image files in analysis" # Find the most significant image change IMAGE_FILES=$(git diff --name-status origin/main..$DIFF_TARGET | grep -E ".(png|jpg|jpeg|gif|svg|webp)$" | awk '{print $2}') if [[ -n "$IMAGE_FILES" ]]; then # Find the largest added/modified image by looking at file size while IFS= read -r img_file; do if [[ -f "$img_file" ]]; then # Get file size in bytes (compatible with both macOS and Linux) FILE_SIZE=$(stat -f "%z" "$img_file" 2>/dev/null || stat -c "%s" "$img_file" 2>/dev/null || echo "0") # Find containing markdown file to link to # Look for filenames that include the image basename IMAGE_BASENAME=$(basename "$img_file") CONTAINING_MD=$(grep -l "$IMAGE_BASENAME" $(find docs -name "*.md") 2>/dev/null | head -1) if [[ -n "$CONTAINING_MD" ]]; then echo "Found image $img_file ($FILE_SIZE bytes) referenced in $CONTAINING_MD" if [[ -z "$MOST_SIGNIFICANT_IMAGE" || $FILE_SIZE -gt $MAX_ADDITIONS ]]; then MOST_SIGNIFICANT_IMAGE="$img_file" MOST_CHANGED="$CONTAINING_MD" MAX_ADDITIONS=$FILE_SIZE fi else echo "Found image $img_file ($FILE_SIZE bytes) but no matching markdown file" if [[ -z "$MOST_SIGNIFICANT_IMAGE" || $FILE_SIZE -gt $MAX_ADDITIONS ]]; then MOST_SIGNIFICANT_IMAGE="$img_file" MOST_CHANGED="" MAX_ADDITIONS=$FILE_SIZE fi fi fi done <<< "$IMAGE_FILES" # Simple file analysis based on line count for file in $CHANGED_FILES; do if [[ -f "$file" ]]; then # Get number of lines in file as a simple proxy for significance LINE_COUNT=$(wc -l < "$file" | tr -d ' ') if [[ -n "$MOST_SIGNIFICANT_IMAGE" ]]; then echo "Most significant image: $MOST_SIGNIFICANT_IMAGE ($MAX_ADDITIONS bytes)" echo "most_significant_image=$MOST_SIGNIFICANT_IMAGE" >> $GITHUB_OUTPUT # If we found a containing markdown file, use that for the URL path if [[ -n "$MOST_CHANGED" ]]; then echo "Referenced in markdown file: $MOST_CHANGED" # Convert path to URL path by removing the file extension and default index files URL_PATH=$(echo "$MOST_CHANGED" | sed -E 's/\.md$//' | sed -E 's/\/index$//') echo "URL path for markdown file: $URL_PATH" echo "most_changed_file=$MOST_CHANGED" >> $GITHUB_OUTPUT echo "most_changed_url_path=$URL_PATH" >> $GITHUB_OUTPUT echo "most_changed_additions=$MAX_ADDITIONS" >> $GITHUB_OUTPUT # Add image URL for thumbnail display if possible IMAGE_URL_PATH=$(echo "$MOST_SIGNIFICANT_IMAGE" | sed 's/^docs\///') echo "most_changed_image=$IMAGE_URL_PATH" >> $GITHUB_OUTPUT fi if (( LINE_COUNT > MAX_ADDITIONS )); then MAX_ADDITIONS=$LINE_COUNT MOST_CHANGED=$file fi fi # If we haven't found a significant image link, fall back to default behavior if [[ -z "$MOST_CHANGED" ]]; then echo "No significant image reference found, falling back to regular analysis" else # We've found our image connection, so we can exit this step return 0 fi fi done # Standard analysis for finding the most changed file if not already found if [[ -z "$MOST_CHANGED" ]]; then MAX_ADDITIONS=0 if [[ -n "$MOST_CHANGED" ]]; then echo "Most changed file: $MOST_CHANGED with $MAX_ADDITIONS lines" while IFS= read -r file; do if [[ -n "$file" ]]; then # Get additions count for this file ADDITIONS=$(git diff --numstat origin/main..$DIFF_TARGET -- "$file" | awk '{print $1}') if (( ADDITIONS > MAX_ADDITIONS && ADDITIONS > 0 )); then MAX_ADDITIONS=$ADDITIONS MOST_CHANGED=$file fi fi done <<< "$CHANGED_FILES" if [[ -n "$MOST_CHANGED" ]]; then echo "Most changed file: $MOST_CHANGED with $MAX_ADDITIONS additions" # Convert path to URL path by removing the file extension and default index files URL_PATH=$(echo $MOST_CHANGED | sed -E 's/\.md$//' | sed -E 's/\/index$//') echo "URL path for most changed file: $URL_PATH" echo "most_changed_file=$MOST_CHANGED" >> $GITHUB_OUTPUT echo "most_changed_url_path=$URL_PATH" >> $GITHUB_OUTPUT echo "most_changed_additions=$MAX_ADDITIONS" >> $GITHUB_OUTPUT else echo "Could not determine most changed file. This is unexpected." fi # Convert path to URL path URL_PATH=$(echo "$MOST_CHANGED" | sed -E 's/\.md$//' | sed -E 's/\/index$//') echo "URL path for most changed file: $URL_PATH" echo "most_changed_file=$MOST_CHANGED" >> $GITHUB_OUTPUT echo "most_changed_url_path=$URL_PATH" >> $GITHUB_OUTPUT echo "most_changed_additions=$MAX_ADDITIONS" >> $GITHUB_OUTPUT fi - name: Create and encode preview URL id: create_preview_url Expand Down