From 1d6b0491a75687553fa9c37b68cd5f71aa6fee6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= <fred@miniflux.net>
Date: Mon, 29 Jun 2020 18:08:19 -0700
Subject: [PATCH] Ignore <media:title> in RSS 2.0 feeds

In the vast majority of cases, the default entry title is correct.

Ignoring <media:title> avoid overriding the default title if they are different.
---
 reader/rss/parser_test.go | 45 +++++++++++++++++++++++++++++++++++++++
 reader/rss/rss.go         | 27 +++++++++++++++++++++--
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go
index e41eec88..16dd1c2d 100644
--- a/reader/rss/parser_test.go
+++ b/reader/rss/parser_test.go
@@ -136,6 +136,51 @@ func TestParseEntryWithoutTitle(t *testing.T) {
 	}
 }
 
+func TestParseEntryWithMediaTitle(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
+		<channel>
+			<link>https://example.org/</link>
+			<item>
+				<title>Entry Title</title>
+				<link>https://example.org/item</link>
+				<media:title>Media Title</media:title>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Entries[0].Title != "Entry Title" {
+		t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+	}
+}
+
+func TestParseEntryWithDCTitleOnly(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:dc="http://purl.org/dc/elements/1.1/">
+		<channel>
+			<link>https://example.org/</link>
+			<item>
+				<dc:title>Entry Title</dc:title>
+				<link>https://example.org/item</link>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Entries[0].Title != "Entry Title" {
+		t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+	}
+}
+
 func TestParseEntryWithoutLink(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss version="2.0">
diff --git a/reader/rss/rss.go b/reader/rss/rss.go
index 3619ec68..cbb1bd19 100644
--- a/reader/rss/rss.go
+++ b/reader/rss/rss.go
@@ -122,6 +122,12 @@ type rssAuthor struct {
 	Inner   string `xml:",innerxml"`
 }
 
+type rssTitle struct {
+	XMLName xml.Name
+	Data    string `xml:",chardata"`
+	Inner   string `xml:",innerxml"`
+}
+
 type rssEnclosure struct {
 	URL    string `xml:"url,attr"`
 	Type   string `xml:"type,attr"`
@@ -138,7 +144,7 @@ func (enclosure *rssEnclosure) Size() int64 {
 
 type rssItem struct {
 	GUID           string           `xml:"guid"`
-	Title          string           `xml:"title"`
+	Title          []rssTitle       `xml:"title"`
 	Links          []rssLink        `xml:"link"`
 	Description    string           `xml:"description"`
 	PubDate        string           `xml:"pubDate"`
@@ -223,7 +229,24 @@ func (r *rssItem) entryHash() string {
 }
 
 func (r *rssItem) entryTitle() string {
-	return strings.TrimSpace(sanitizer.StripTags(r.Title))
+	var title string
+
+	for _, rssTitle := range r.Title {
+		switch rssTitle.XMLName.Space {
+		case "http://search.yahoo.com/mrss/":
+			// Ignore title in media namespace
+		case "http://purl.org/dc/elements/1.1/":
+			title = rssTitle.Data
+		default:
+			title = rssTitle.Data
+		}
+
+		if title != "" {
+			break
+		}
+	}
+
+	return strings.TrimSpace(sanitizer.StripTags(title))
 }
 
 func (r *rssItem) entryContent() string {
-- 
GitLab