Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

[WIP] Invisible character filtering#426

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Draft
Copilot wants to merge5 commits intomain
base:main
Choose a base branch
Loading
fromcopilot/fix-415
Draft
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletionsREADME.md
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -219,6 +219,41 @@ docker run -i --rm \
ghcr.io/github/github-mcp-server
```

## Content Filtering

The GitHub MCP Server includes a content filtering feature that removes invisible characters and hidden content from GitHub issues, PRs, and comments. This helps prevent potential security risks and ensures better readability of content.

### What Gets Filtered

- **Invisible Unicode Characters**: Zero-width spaces, zero-width joiners, zero-width non-joiners, bidirectional marks, and other invisible Unicode characters
- **HTML Comments**: Comments that might contain hidden information
- **Hidden HTML Elements**: Script, style, iframe, and other potentially dangerous HTML elements
- **Collapsed Sections**: Details/summary elements that might hide content
- **Very Small Text**: Content with extremely small font size

### Controlling Content Filtering

Content filtering is enabled by default. You can disable it using the `--disable-content-filtering` flag:

```bash
github-mcp-server --disable-content-filtering
```

Or using the environment variable:

```bash
GITHUB_DISABLE_CONTENT_FILTERING=1 github-mcp-server
```

When using Docker, you can set the environment variable:

```bash
docker run -i --rm \
-e GITHUB_PERSONAL_ACCESS_TOKEN=<your-token> \
-e GITHUB_DISABLE_CONTENT_FILTERING=1 \
ghcr.io/github/github-mcp-server
```

## GitHub Enterprise Server

The flag `--gh-host` and the environment variable `GITHUB_HOST` can be used to set
Expand Down
21 changes: 12 additions & 9 deletionscmd/github-mcp-server/main.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -44,15 +44,16 @@ var (
}

stdioServerConfig := ghmcp.StdioServerConfig{
Version: version,
Host: viper.GetString("host"),
Token: token,
EnabledToolsets: enabledToolsets,
DynamicToolsets: viper.GetBool("dynamic_toolsets"),
ReadOnly: viper.GetBool("read-only"),
ExportTranslations: viper.GetBool("export-translations"),
EnableCommandLogging: viper.GetBool("enable-command-logging"),
LogFilePath: viper.GetString("log-file"),
Version: version,
Host: viper.GetString("host"),
Token: token,
EnabledToolsets: enabledToolsets,
DynamicToolsets: viper.GetBool("dynamic_toolsets"),
ReadOnly: viper.GetBool("read-only"),
DisableContentFiltering: viper.GetBool("disable-content-filtering"),
ExportTranslations: viper.GetBool("export-translations"),
EnableCommandLogging: viper.GetBool("enable-command-logging"),
LogFilePath: viper.GetString("log-file"),
}

return ghmcp.RunStdioServer(stdioServerConfig)
Expand All@@ -73,6 +74,7 @@ func init() {
rootCmd.PersistentFlags().Bool("enable-command-logging", false, "When enabled, the server will log all command requests and responses to the log file")
rootCmd.PersistentFlags().Bool("export-translations", false, "Save translations to a JSON file")
rootCmd.PersistentFlags().String("gh-host", "", "Specify the GitHub hostname (for GitHub Enterprise etc.)")
rootCmd.PersistentFlags().Bool("disable-content-filtering", false, "Disable filtering of invisible characters and hidden content from GitHub issues, PRs, and comments")

// Bind flag to viper
_ = viper.BindPFlag("toolsets", rootCmd.PersistentFlags().Lookup("toolsets"))
Expand All@@ -82,6 +84,7 @@ func init() {
_ = viper.BindPFlag("enable-command-logging", rootCmd.PersistentFlags().Lookup("enable-command-logging"))
_ = viper.BindPFlag("export-translations", rootCmd.PersistentFlags().Lookup("export-translations"))
_ = viper.BindPFlag("host", rootCmd.PersistentFlags().Lookup("gh-host"))
_ = viper.BindPFlag("disable-content-filtering", rootCmd.PersistentFlags().Lookup("disable-content-filtering"))

// Add subcommands
rootCmd.AddCommand(stdioCmd)
Expand Down
26 changes: 18 additions & 8 deletionsinternal/ghmcp/server.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -43,6 +43,9 @@ type MCPServerConfig struct {
// ReadOnly indicates if we should only offer read-only tools
ReadOnly bool

// DisableContentFiltering disables filtering of invisible characters and hidden content
DisableContentFiltering bool

// Translator provides translated text for the server tooling
Translator translations.TranslationHelperFunc
}
Expand DownExpand Up@@ -91,7 +94,10 @@ func NewMCPServer(cfg MCPServerConfig) (*server.MCPServer, error) {
OnBeforeInitialize: []server.OnBeforeInitializeFunc{beforeInit},
}

ghServer := github.NewServer(cfg.Version, server.WithHooks(hooks))
ghServer := github.NewServerWithConfig(github.ServerConfig{
Version: cfg.Version,
DisableContentFiltering: cfg.DisableContentFiltering,
}, server.WithHooks(hooks))

enabledToolsets := cfg.EnabledToolsets
if cfg.DynamicToolsets {
Expand DownExpand Up@@ -160,6 +166,9 @@ type StdioServerConfig struct {
// ReadOnly indicates if we should only register read-only tools
ReadOnly bool

// DisableContentFiltering disables filtering of invisible characters and hidden content
DisableContentFiltering bool

// ExportTranslations indicates if we should export translations
// See: https://github.com/github/github-mcp-server?tab=readme-ov-file#i18n--overriding-descriptions
ExportTranslations bool
Expand All@@ -180,13 +189,14 @@ func RunStdioServer(cfg StdioServerConfig) error {
t, dumpTranslations := translations.TranslationHelper()

ghServer, err := NewMCPServer(MCPServerConfig{
Version: cfg.Version,
Host: cfg.Host,
Token: cfg.Token,
EnabledToolsets: cfg.EnabledToolsets,
DynamicToolsets: cfg.DynamicToolsets,
ReadOnly: cfg.ReadOnly,
Translator: t,
Version: cfg.Version,
Host: cfg.Host,
Token: cfg.Token,
EnabledToolsets: cfg.EnabledToolsets,
DynamicToolsets: cfg.DynamicToolsets,
ReadOnly: cfg.ReadOnly,
DisableContentFiltering: cfg.DisableContentFiltering,
Translator: t,
})
if err != nil {
return fmt.Errorf("failed to create MCP server: %w", err)
Expand Down
145 changes: 145 additions & 0 deletionspkg/filtering/content_filter.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
package filtering

import (
"regexp"
"strings"
)

var (
// Invisible Unicode characters
// This includes zero-width spaces, zero-width joiners, zero-width non-joiners,
// bidirectional marks, and other invisible unicode characters
invisibleCharsRegex = regexp.MustCompile(`[\x{200B}-\x{200F}\x{2028}-\x{202E}\x{2060}-\x{2064}\x{FEFF}]`)

// HTML comments
htmlCommentsRegex = regexp.MustCompile(`<!--[\s\S]*?-->`)

// HTML elements that could contain hidden content
// This is a simple approach that targets specific dangerous tags
// Go's regexp doesn't support backreferences, so we list each tag explicitly
htmlScriptRegex = regexp.MustCompile(`<script[^>]*>[\s\S]*?</script>`)
htmlStyleRegex = regexp.MustCompile(`<style[^>]*>[\s\S]*?</style>`)
htmlIframeRegex = regexp.MustCompile(`<iframe[^>]*>[\s\S]*?</iframe>`)
htmlObjectRegex = regexp.MustCompile(`<object[^>]*>[\s\S]*?</object>`)
htmlEmbedRegex = regexp.MustCompile(`<embed[^>]*>[\s\S]*?</embed>`)
htmlSvgRegex = regexp.MustCompile(`<svg[^>]*>[\s\S]*?</svg>`)
htmlMathRegex = regexp.MustCompile(`<math[^>]*>[\s\S]*?</math>`)
htmlLinkRegex = regexp.MustCompile(`<link[^>]*>[\s\S]*?</link>`)

// HTML attributes that might be used for hiding content
htmlAttributesRegex = regexp.MustCompile(`<[^>]*(?:style|data-[\w-]+|hidden|class)="[^"]*"[^>]*>`)

// Detect collapsed sections (details/summary)
collapsedSectionsRegex = regexp.MustCompile(`<details>[\s\S]*?</details>`)

// Very small text (font-size or similar CSS tricks)
smallTextRegex = regexp.MustCompile(`<[^>]*style="[^"]*font-size:\s*(?:0|0\.\d+|[0-3])(?:px|pt|em|%)[^"]*"[^>]*>[\s\S]*?</[^>]+>`)

// Excessive whitespace (more than 3 consecutive newlines)
excessiveWhitespaceRegex = regexp.MustCompile(`\n{4,}`)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

@copilot perhaps also add a filter for excessive spaces and tabs too. Probably for spaces 15+ and for tabs 6+

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

I've added filters for excessive spaces and tabs as requested. The implementation filters sequences of 15+ consecutive spaces and 6+ consecutive tabs, replacing them with normalized versions (14 spaces and 5 tabs respectively). Changes are in commit2e27e2a.


// Excessive spaces (15 or more consecutive spaces)
excessiveSpacesRegex = regexp.MustCompile(` {15,}`)

// Excessive tabs (6 or more consecutive tabs)
excessiveTabsRegex = regexp.MustCompile(`\t{6,}`)
)

// Config holds configuration for content filtering
type Config struct {
// DisableContentFiltering disables all content filtering when true
DisableContentFiltering bool
}

// DefaultConfig returns the default content filtering configuration
func DefaultConfig() *Config {
return &Config{
DisableContentFiltering: false,
}
}

// FilterContent filters potentially hidden content from the input text
// This includes invisible Unicode characters, HTML comments, and other methods of hiding content
func FilterContent(input string, cfg *Config) string {
if cfg != nil && cfg.DisableContentFiltering {
return input
}

if input == "" {
return input
}

// Process the input text through each filter
result := input

// Remove invisible characters
result = invisibleCharsRegex.ReplaceAllString(result, "")

// Replace HTML comments with a marker
result = htmlCommentsRegex.ReplaceAllString(result, "[HTML_COMMENT]")

// Replace potentially dangerous HTML elements
result = htmlScriptRegex.ReplaceAllString(result, "[HTML_ELEMENT]")
result = htmlStyleRegex.ReplaceAllString(result, "[HTML_ELEMENT]")
result = htmlIframeRegex.ReplaceAllString(result, "[HTML_ELEMENT]")
result = htmlObjectRegex.ReplaceAllString(result, "[HTML_ELEMENT]")
result = htmlEmbedRegex.ReplaceAllString(result, "[HTML_ELEMENT]")
result = htmlSvgRegex.ReplaceAllString(result, "[HTML_ELEMENT]")
result = htmlMathRegex.ReplaceAllString(result, "[HTML_ELEMENT]")
result = htmlLinkRegex.ReplaceAllString(result, "[HTML_ELEMENT]")

// Replace HTML attributes that might be used for hiding
result = htmlAttributesRegex.ReplaceAllStringFunc(result, cleanHTMLAttributes)

// Replace collapsed sections with visible indicator
result = collapsedSectionsRegex.ReplaceAllStringFunc(result, makeCollapsedSectionVisible)

// Replace very small text with visible indicator
result = smallTextRegex.ReplaceAllString(result, "[SMALL_TEXT]")

// Normalize excessive whitespace
result = excessiveWhitespaceRegex.ReplaceAllString(result, "\n\n\n")

// Normalize excessive spaces
result = excessiveSpacesRegex.ReplaceAllString(result, " ")

// Normalize excessive tabs
result = excessiveTabsRegex.ReplaceAllString(result, " ")

return result
}

// cleanHTMLAttributes removes potentially dangerous attributes from HTML tags
func cleanHTMLAttributes(tag string) string {
// This is a simple implementation that removes style, data-* and hidden attributes
// A more sophisticated implementation would parse the HTML and selectively remove attributes
tagWithoutStyle := regexp.MustCompile(`\s+(?:style|data-[\w-]+|hidden|class)="[^"]*"`).ReplaceAllString(tag, "")
return tagWithoutStyle
}

// makeCollapsedSectionVisible transforms a <details> section to make it visible
func makeCollapsedSectionVisible(detailsSection string) string {
// Extract the summary if present
summaryRegex := regexp.MustCompile(`<summary>(.*?)</summary>`)
summaryMatches := summaryRegex.FindStringSubmatch(detailsSection)

summary := "Collapsed section"
if len(summaryMatches) > 1 {
summary = summaryMatches[1]
}

// Extract the content (everything after </summary> and before </details>)
parts := strings.SplitN(detailsSection, "</summary>", 2)
content := detailsSection
if len(parts) > 1 {
content = parts[1]
content = strings.TrimSuffix(content, "</details>")
} else {
// No summary tag found, remove the details tags
content = strings.TrimPrefix(content, "<details>")
content = strings.TrimSuffix(content, "</details>")
}

// Format as a visible section
return "\n\n**" + summary + ":**\n" + content + "\n\n"
}
Loading

[8]ページ先頭

©2009-2025 Movatter.jp