Skip to content
Snippets Groups Projects
Commit bd663b43 authored by Frédéric Guillot's avatar Frédéric Guillot
Browse files

Improve HTML sanitizer

parent 1f015d5d
No related branches found
No related tags found
No related merge requests found
...@@ -148,7 +148,7 @@ func TestParsePodcast(t *testing.T) { ...@@ -148,7 +148,7 @@ func TestParsePodcast(t *testing.T) {
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title) t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
} }
if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` { if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged &amp; Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content) t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
} }
......
...@@ -94,7 +94,7 @@ func TestParseRss2Sample(t *testing.T) { ...@@ -94,7 +94,7 @@ func TestParseRss2Sample(t *testing.T) {
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
} }
if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` { if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia&#39;s <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` {
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
} }
} }
......
...@@ -7,10 +7,11 @@ package sanitizer ...@@ -7,10 +7,11 @@ package sanitizer
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"github.com/miniflux/miniflux2/reader/url"
"io" "io"
"strings" "strings"
"github.com/miniflux/miniflux2/reader/url"
"golang.org/x/net/html" "golang.org/x/net/html"
) )
...@@ -33,7 +34,7 @@ func Sanitize(baseURL, input string) string { ...@@ -33,7 +34,7 @@ func Sanitize(baseURL, input string) string {
token := tokenizer.Token() token := tokenizer.Token()
switch token.Type { switch token.Type {
case html.TextToken: case html.TextToken:
buffer.WriteString(token.Data) buffer.WriteString(html.EscapeString(token.Data))
case html.StartTagToken: case html.StartTagToken:
tagName := token.DataAtom.String() tagName := token.DataAtom.String()
...@@ -72,8 +73,8 @@ func Sanitize(baseURL, input string) string { ...@@ -72,8 +73,8 @@ func Sanitize(baseURL, input string) string {
} }
} }
func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) (attrNames []string, html string) { func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([]string, string) {
var htmlAttrs []string var htmlAttrs, attrNames []string
var err error var err error
for _, attribute := range attributes { for _, attribute := range attributes {
...@@ -99,7 +100,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) (a ...@@ -99,7 +100,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) (a
} }
attrNames = append(attrNames, attribute.Key) attrNames = append(attrNames, attribute.Key)
htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, value)) htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value)))
} }
extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName) extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName)
......
...@@ -142,3 +142,23 @@ func TestPixelTracker(t *testing.T) { ...@@ -142,3 +142,23 @@ func TestPixelTracker(t *testing.T) {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
} }
} }
func TestXmlEntities(t *testing.T) {
input := `<pre>echo "test" &gt; /etc/hosts</pre>`
expected := `<pre>echo &#34;test&#34; &gt; /etc/hosts</pre>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestEspaceAttributes(t *testing.T) {
input := `<td rowspan="<b>test</b>">test</td>`
expected := `<td rowspan="&lt;b&gt;test&lt;/b&gt;">test</td>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment