From 6ea4da3bce2cc9321926be74d0cbc6ad0f01f363 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= <fred@miniflux.net>
Date: Sun, 18 Mar 2018 11:57:46 -0700
Subject: [PATCH] Handle RSS author elements with inner HTML

---
 reader/rss/parser_test.go | 25 +++++++++++++++++++++++++
 reader/rss/rss.go         |  8 +++++---
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go
index 7bf9f755..6d9015d2 100644
--- a/reader/rss/parser_test.go
+++ b/reader/rss/parser_test.go
@@ -230,6 +230,31 @@ func TestParseFeedURLWithAtomLink(t *testing.T) {
 	}
 }
 
+func TestParseEntryWithAuthorAndInnerHTML(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
+		<channel>
+			<title>Example</title>
+			<link>https://example.org/</link>
+			<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
+			<item>
+				<title>Test</title>
+				<link>https://example.org/item</link>
+				<author>by <a itemprop="url" class="author" rel="author" href="/author/foobar">Foo Bar</a></author>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if feed.Entries[0].Author != "by Foo Bar" {
+		t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+	}
+}
+
 func TestParseEntryWithAtomAuthor(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
diff --git a/reader/rss/rss.go b/reader/rss/rss.go
index 041cd198..1cd82bae 100644
--- a/reader/rss/rss.go
+++ b/reader/rss/rss.go
@@ -15,6 +15,7 @@ import (
 	"github.com/miniflux/miniflux/logger"
 	"github.com/miniflux/miniflux/model"
 	"github.com/miniflux/miniflux/reader/date"
+	"github.com/miniflux/miniflux/reader/sanitizer"
 	"github.com/miniflux/miniflux/url"
 )
 
@@ -56,6 +57,7 @@ type rssAuthor struct {
 	XMLName xml.Name
 	Data    string `xml:",chardata"`
 	Name    string `xml:"name"`
+	Inner   string `xml:",innerxml"`
 }
 
 type rssEnclosure struct {
@@ -100,7 +102,7 @@ func (r *rssFeed) Transform() *model.Feed {
 		if entry.Author == "" && r.ItunesAuthor != "" {
 			entry.Author = r.ItunesAuthor
 		}
-		entry.Author = strings.TrimSpace(entry.Author)
+		entry.Author = strings.TrimSpace(sanitizer.StripTags(entry.Author))
 
 		if entry.URL == "" {
 			entry.URL = feed.SiteURL
@@ -146,8 +148,8 @@ func (r *rssItem) GetAuthor() string {
 			return element.Name
 		}
 
-		if element.Data != "" {
-			return element.Data
+		if element.Inner != "" {
+			return element.Inner
 		}
 	}
 
-- 
GitLab