From 5b8eb4735c2e3a32ae1b5e56fa3b103da5602e0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= <f@miniflux.net> Date: Fri, 30 Apr 2021 22:49:17 -0700 Subject: [PATCH] Handle RSS feed title with encoded Unicode entities --- reader/rss/parser_test.go | 19 +++++++++++++++++++ reader/rss/rss.go | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index 67672127..197994c7 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -998,6 +998,25 @@ func TestParseFeedTitleWithHTMLEntity(t *testing.T) { } } +func TestParseFeedTitleWithUnicodeEntityAndCdata(t *testing.T) { + data := `<?xml version="1.0" encoding="utf-8"?> + <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"> + <channel> + <link>https://example.org/</link> + <title><![CDATA[Jenny’s Newsletter]]></title> + </channel> + </rss>` + + feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Title != `Jenny’s Newsletter` { + t.Errorf(`Incorrect title, got: %q`, feed.Title) + } +} + func TestParseItemTitleWithHTMLEntity(t *testing.T) { data := `<?xml version="1.0" encoding="utf-8"?> <rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"> diff --git a/reader/rss/rss.go b/reader/rss/rss.go index 01caada4..db082393 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -53,7 +53,7 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed { feed.FeedURL = feedURL } - feed.Title = strings.TrimSpace(r.Title) + feed.Title = html.UnescapeString(strings.TrimSpace(r.Title)) if feed.Title == "" { feed.Title = feed.SiteURL } -- GitLab