Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite2f6b44

Browse files
Add content filtering package
Co-authored-by: SamMorrowDrums <4811358+SamMorrowDrums@users.noreply.github.com>
1 parent015b8b6 commite2f6b44

File tree

2 files changed

+306
-0
lines changed

2 files changed

+306
-0
lines changed

‎pkg/filtering/content_filter.go

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
package filtering
2+
3+
import (
4+
"regexp"
5+
"strings"
6+
)
7+
8+
var (
9+
// Invisible Unicode characters
10+
// This includes zero-width spaces, zero-width joiners, zero-width non-joiners,
11+
// bidirectional marks, and other invisible unicode characters
12+
invisibleCharsRegex=regexp.MustCompile(`[\x{200B}-\x{200F}\x{2028}-\x{202E}\x{2060}-\x{2064}\x{FEFF}]`)
13+
14+
// HTML comments
15+
htmlCommentsRegex=regexp.MustCompile(`<!--[\s\S]*?-->`)
16+
17+
// HTML elements that could contain hidden content
18+
// This is a simple approach that targets specific dangerous tags
19+
// Go's regexp doesn't support backreferences, so we list each tag explicitly
20+
htmlScriptRegex=regexp.MustCompile(`<script[^>]*>[\s\S]*?</script>`)
21+
htmlStyleRegex=regexp.MustCompile(`<style[^>]*>[\s\S]*?</style>`)
22+
htmlIframeRegex=regexp.MustCompile(`<iframe[^>]*>[\s\S]*?</iframe>`)
23+
htmlObjectRegex=regexp.MustCompile(`<object[^>]*>[\s\S]*?</object>`)
24+
htmlEmbedRegex=regexp.MustCompile(`<embed[^>]*>[\s\S]*?</embed>`)
25+
htmlSvgRegex=regexp.MustCompile(`<svg[^>]*>[\s\S]*?</svg>`)
26+
htmlMathRegex=regexp.MustCompile(`<math[^>]*>[\s\S]*?</math>`)
27+
htmlLinkRegex=regexp.MustCompile(`<link[^>]*>[\s\S]*?</link>`)
28+
29+
// HTML attributes that might be used for hiding content
30+
htmlAttributesRegex=regexp.MustCompile(`<[^>]*(?:style|data-[\w-]+|hidden|class)="[^"]*"[^>]*>`)
31+
32+
// Detect collapsed sections (details/summary)
33+
collapsedSectionsRegex=regexp.MustCompile(`<details>[\s\S]*?</details>`)
34+
35+
// Very small text (font-size or similar CSS tricks)
36+
smallTextRegex=regexp.MustCompile(`<[^>]*style="[^"]*font-size:\s*(?:0|0\.\d+|[0-3])(?:px|pt|em|%)[^"]*"[^>]*>[\s\S]*?</[^>]+>`)
37+
38+
// Excessive whitespace (more than 3 consecutive newlines)
39+
excessiveWhitespaceRegex=regexp.MustCompile(`\n{4,}`)
40+
)
41+
42+
// Config holds configuration for content filtering
43+
typeConfigstruct {
44+
// DisableContentFiltering disables all content filtering when true
45+
DisableContentFilteringbool
46+
}
47+
48+
// DefaultConfig returns the default content filtering configuration
49+
funcDefaultConfig()*Config {
50+
return&Config{
51+
DisableContentFiltering:false,
52+
}
53+
}
54+
55+
// FilterContent filters potentially hidden content from the input text
56+
// This includes invisible Unicode characters, HTML comments, and other methods of hiding content
57+
funcFilterContent(inputstring,cfg*Config)string {
58+
ifcfg!=nil&&cfg.DisableContentFiltering {
59+
returninput
60+
}
61+
62+
ifinput=="" {
63+
returninput
64+
}
65+
66+
// Process the input text through each filter
67+
result:=input
68+
69+
// Remove invisible characters
70+
result=invisibleCharsRegex.ReplaceAllString(result,"")
71+
72+
// Replace HTML comments with a marker
73+
result=htmlCommentsRegex.ReplaceAllString(result,"[HTML_COMMENT]")
74+
75+
// Replace potentially dangerous HTML elements
76+
result=htmlScriptRegex.ReplaceAllString(result,"[HTML_ELEMENT]")
77+
result=htmlStyleRegex.ReplaceAllString(result,"[HTML_ELEMENT]")
78+
result=htmlIframeRegex.ReplaceAllString(result,"[HTML_ELEMENT]")
79+
result=htmlObjectRegex.ReplaceAllString(result,"[HTML_ELEMENT]")
80+
result=htmlEmbedRegex.ReplaceAllString(result,"[HTML_ELEMENT]")
81+
result=htmlSvgRegex.ReplaceAllString(result,"[HTML_ELEMENT]")
82+
result=htmlMathRegex.ReplaceAllString(result,"[HTML_ELEMENT]")
83+
result=htmlLinkRegex.ReplaceAllString(result,"[HTML_ELEMENT]")
84+
85+
// Replace HTML attributes that might be used for hiding
86+
result=htmlAttributesRegex.ReplaceAllStringFunc(result,cleanHTMLAttributes)
87+
88+
// Replace collapsed sections with visible indicator
89+
result=collapsedSectionsRegex.ReplaceAllStringFunc(result,makeCollapsedSectionVisible)
90+
91+
// Replace very small text with visible indicator
92+
result=smallTextRegex.ReplaceAllString(result,"[SMALL_TEXT]")
93+
94+
// Normalize excessive whitespace
95+
result=excessiveWhitespaceRegex.ReplaceAllString(result,"\n\n\n")
96+
97+
returnresult
98+
}
99+
100+
// cleanHTMLAttributes removes potentially dangerous attributes from HTML tags
101+
funccleanHTMLAttributes(tagstring)string {
102+
// This is a simple implementation that removes style, data-* and hidden attributes
103+
// A more sophisticated implementation would parse the HTML and selectively remove attributes
104+
tagWithoutStyle:=regexp.MustCompile(`\s+(?:style|data-[\w-]+|hidden|class)="[^"]*"`).ReplaceAllString(tag,"")
105+
returntagWithoutStyle
106+
}
107+
108+
// makeCollapsedSectionVisible transforms a <details> section to make it visible
109+
funcmakeCollapsedSectionVisible(detailsSectionstring)string {
110+
// Extract the summary if present
111+
summaryRegex:=regexp.MustCompile(`<summary>(.*?)</summary>`)
112+
summaryMatches:=summaryRegex.FindStringSubmatch(detailsSection)
113+
114+
summary:="Collapsed section"
115+
iflen(summaryMatches)>1 {
116+
summary=summaryMatches[1]
117+
}
118+
119+
// Extract the content (everything after </summary> and before </details>)
120+
parts:=strings.SplitN(detailsSection,"</summary>",2)
121+
content:=detailsSection
122+
iflen(parts)>1 {
123+
content=parts[1]
124+
content=strings.TrimSuffix(content,"</details>")
125+
}else {
126+
// No summary tag found, remove the details tags
127+
content=strings.TrimPrefix(content,"<details>")
128+
content=strings.TrimSuffix(content,"</details>")
129+
}
130+
131+
// Format as a visible section
132+
return"\n\n**"+summary+":**\n"+content+"\n\n"
133+
}

‎pkg/filtering/content_filter_test.go

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
package filtering
2+
3+
import (
4+
"testing"
5+
)
6+
7+
funcTestFilterContent(t*testing.T) {
8+
tests:= []struct {
9+
namestring
10+
inputstring
11+
expectedstring
12+
cfg*Config
13+
}{
14+
{
15+
name:"Empty string",
16+
input:"",
17+
expected:"",
18+
cfg:DefaultConfig(),
19+
},
20+
{
21+
name:"Normal text without hidden content",
22+
input:"This is normal text without any hidden content.",
23+
expected:"This is normal text without any hidden content.",
24+
cfg:DefaultConfig(),
25+
},
26+
{
27+
name:"Text with invisible characters",
28+
input:"Hidden\u200Bcharacters\u200Bin\u200Bthis\u200Btext",
29+
expected:"Hiddencharactersinthistext",
30+
cfg:DefaultConfig(),
31+
},
32+
{
33+
name:"Text with HTML comments",
34+
input:"This has a <!-- hidden comment --> in it.",
35+
expected:"This has a [HTML_COMMENT] in it.",
36+
cfg:DefaultConfig(),
37+
},
38+
{
39+
name:"Text with HTML elements",
40+
input:"This has <script>alert('hidden')</script> scripts.",
41+
expected:"This has [HTML_ELEMENT] scripts.",
42+
cfg:DefaultConfig(),
43+
},
44+
{
45+
name:"Text with details/summary",
46+
input:"Collapsed content: <details><summary>Click me</summary>Hidden content</details>",
47+
expected:"Collapsed content:\n\n**Click me:**\nHidden content\n\n",
48+
cfg:DefaultConfig(),
49+
},
50+
{
51+
name:"Text with small font",
52+
input:"This has <span style=\"font-size:1px\">hidden tiny text</span> in it.",
53+
expected:"This has <span>hidden tiny text</span> in it.",
54+
cfg:DefaultConfig(),
55+
},
56+
{
57+
name:"Text with excessive whitespace",
58+
input:"Line 1\n\n\n\n\n\nLine 2",
59+
expected:"Line 1\n\n\nLine 2",
60+
cfg:DefaultConfig(),
61+
},
62+
{
63+
name:"Text with HTML attributes",
64+
input:"<p data-hidden=\"true\" style=\"display:none\">Hidden paragraph</p>",
65+
expected:"<p>Hidden paragraph</p>",
66+
cfg:DefaultConfig(),
67+
},
68+
{
69+
name:"Filtering disabled",
70+
input:"Hidden\u200Bcharacters and <!-- comments -->",
71+
expected:"Hidden\u200Bcharacters and <!-- comments -->",
72+
cfg:&Config{DisableContentFiltering:true},
73+
},
74+
{
75+
name:"Nil config uses default (filtering enabled)",
76+
input:"Hidden\u200Bcharacters",
77+
expected:"Hiddencharacters",
78+
cfg:nil,
79+
},
80+
{
81+
name:"Normal markdown with code blocks",
82+
input:"# Title\n\n```go\nfunc main() {\n fmt.Println(\"Hello, world!\")\n}\n```",
83+
expected:"# Title\n\n```go\nfunc main() {\n fmt.Println(\"Hello, world!\")\n}\n```",
84+
cfg:DefaultConfig(),
85+
},
86+
{
87+
name:"GitHub flavored markdown with tables",
88+
input:"| Header 1 | Header 2 |\n| -------- | -------- |\n| Cell 1 | Cell 2 |",
89+
expected:"| Header 1 | Header 2 |\n| -------- | -------- |\n| Cell 1 | Cell 2 |",
90+
cfg:DefaultConfig(),
91+
},
92+
}
93+
94+
for_,tc:=rangetests {
95+
t.Run(tc.name,func(t*testing.T) {
96+
result:=FilterContent(tc.input,tc.cfg)
97+
ifresult!=tc.expected {
98+
t.Errorf("FilterContent() = %q, want %q",result,tc.expected)
99+
}
100+
})
101+
}
102+
}
103+
104+
funcTestMakeCollapsedSectionVisible(t*testing.T) {
105+
tests:= []struct {
106+
namestring
107+
inputstring
108+
expectedstring
109+
}{
110+
{
111+
name:"Simple details/summary",
112+
input:"<details><summary>Click me</summary>Hidden content</details>",
113+
expected:"\n\n**Click me:**\nHidden content\n\n",
114+
},
115+
{
116+
name:"Details without summary",
117+
input:"<details>Hidden content</details>",
118+
expected:"\n\n**Collapsed section:**\nHidden content\n\n",
119+
},
120+
{
121+
name:"Nested content",
122+
input:"<details><summary>Outer</summary>Content<details><summary>Inner</summary>Nested</details></details>",
123+
expected:"\n\n**Outer:**\nContent<details><summary>Inner</summary>Nested</details>\n\n",
124+
},
125+
}
126+
127+
for_,tc:=rangetests {
128+
t.Run(tc.name,func(t*testing.T) {
129+
result:=makeCollapsedSectionVisible(tc.input)
130+
ifresult!=tc.expected {
131+
t.Errorf("makeCollapsedSectionVisible() = %q, want %q",result,tc.expected)
132+
}
133+
})
134+
}
135+
}
136+
137+
funcTestCleanHTMLAttributes(t*testing.T) {
138+
tests:= []struct {
139+
namestring
140+
inputstring
141+
expectedstring
142+
}{
143+
{
144+
name:"Tag with style attribute",
145+
input:"<p style=\"display:none\">Hidden</p>",
146+
expected:"<p>Hidden</p>",
147+
},
148+
{
149+
name:"Tag with data attribute",
150+
input:"<p data-hidden=\"true\">Hidden</p>",
151+
expected:"<p>Hidden</p>",
152+
},
153+
{
154+
name:"Tag with multiple attributes",
155+
input:"<p id=\"para\" style=\"display:none\" data-test=\"value\">Hidden</p>",
156+
expected:"<p id=\"para\">Hidden</p>",
157+
},
158+
{
159+
name:"Tag with allowed attributes",
160+
input:"<a href=\"https://example.com\" target=\"_blank\">Link</a>",
161+
expected:"<a href=\"https://example.com\" target=\"_blank\">Link</a>",
162+
},
163+
}
164+
165+
for_,tc:=rangetests {
166+
t.Run(tc.name,func(t*testing.T) {
167+
result:=cleanHTMLAttributes(tc.input)
168+
ifresult!=tc.expected {
169+
t.Errorf("cleanHTMLAttributes() = %q, want %q",result,tc.expected)
170+
}
171+
})
172+
}
173+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp