From 6ea4da3bce2cc9321926be74d0cbc6ad0f01f363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= <fred@miniflux.net> Date: Sun, 18 Mar 2018 11:57:46 -0700 Subject: [PATCH] Handle RSS author elements with inner HTML --- reader/rss/parser_test.go | 25 +++++++++++++++++++++++++ reader/rss/rss.go | 8 +++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index 7bf9f755..6d9015d2 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -230,6 +230,31 @@ func TestParseFeedURLWithAtomLink(t *testing.T) { } } +func TestParseEntryWithAuthorAndInnerHTML(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0"> + <channel> + <title>Example</title> + <link>https://example.org/</link> + <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link> + <item> + <title>Test</title> + <link>https://example.org/item</link> + <author>by <a itemprop="url" class="author" rel="author" href="/author/foobar">Foo Bar</a></author> + </item> + </channel> + </rss>` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Author != "by Foo Bar" { + t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) + } +} + func TestParseEntryWithAtomAuthor(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0"> diff --git a/reader/rss/rss.go b/reader/rss/rss.go index 041cd198..1cd82bae 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -15,6 +15,7 @@ import ( "github.com/miniflux/miniflux/logger" "github.com/miniflux/miniflux/model" "github.com/miniflux/miniflux/reader/date" + "github.com/miniflux/miniflux/reader/sanitizer" "github.com/miniflux/miniflux/url" ) @@ -56,6 +57,7 @@ type rssAuthor struct { XMLName xml.Name Data string `xml:",chardata"` Name string `xml:"name"` + Inner string `xml:",innerxml"` } type rssEnclosure struct { @@ -100,7 +102,7 @@ func (r *rssFeed) Transform() *model.Feed { if entry.Author == "" && r.ItunesAuthor != "" { entry.Author = r.ItunesAuthor } - entry.Author = strings.TrimSpace(entry.Author) + entry.Author = strings.TrimSpace(sanitizer.StripTags(entry.Author)) if entry.URL == "" { entry.URL = feed.SiteURL @@ -146,8 +148,8 @@ func (r *rssItem) GetAuthor() string { return element.Name } - if element.Data != "" { - return element.Data + if element.Inner != "" { + return element.Inner } } -- GitLab