From 22a866edb5ea406bbd30ca777b58099ce9f55d1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20Kapp=C3=A9?= <tobias.kappe@gmail.com>
Date: Sun, 12 Aug 2018 14:54:13 +0100
Subject: [PATCH] Store language of entries as indicated by the feed.

---
 classes/feeditem.php      |  1 +
 classes/feeditem/atom.php |  9 +++++++++
 classes/feeditem/rss.php  | 12 +++++++++++-
 classes/feedparser.php    |  2 +-
 classes/rssutils.php      |  5 +++--
 5 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/classes/feeditem.php b/classes/feeditem.php
index 594f56984..3a5e5dc09 100644
--- a/classes/feeditem.php
+++ b/classes/feeditem.php
@@ -11,5 +11,6 @@ abstract class FeedItem {
 	abstract function get_categories();
 	abstract function get_enclosures();
 	abstract function get_author();
+	abstract function get_language();
 }
 
diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php
index 77cd448b8..6e7a904f8 100644
--- a/classes/feeditem/atom.php
+++ b/classes/feeditem/atom.php
@@ -197,4 +197,13 @@ class FeedItem_Atom extends FeedItem_Common {
 		return $encs;
 	}
 
+	function get_language() {
+		$elem = $this->elem;
+		do {
+			$lang = $elem->getAttributeNS("http://www.w3.org/XML/1998/namespace", "lang");
+			$elem = $elem->parentNode;
+		} while (empty($lang) && $elem instanceof DOMElement);
+
+		return $lang;
+	}
 }
diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php
index a3fa7e636..dca125be6 100644
--- a/classes/feeditem/rss.php
+++ b/classes/feeditem/rss.php
@@ -189,4 +189,14 @@ class FeedItem_RSS extends FeedItem_Common {
 		return $encs;
 	}
 
-}
\ No newline at end of file
+	function get_language() {
+		$languages = $this->doc->getElementsByTagName('language');
+
+		if (count($languages) == 0) {
+			return "";
+		}
+
+		return $languages[0]->textContent;
+	}
+
+}
diff --git a/classes/feedparser.php b/classes/feedparser.php
index 860ebd73f..a5e406149 100644
--- a/classes/feedparser.php
+++ b/classes/feedparser.php
@@ -283,4 +283,4 @@ class FeedParser {
 
 		return $rv;
 	}
-}
\ No newline at end of file
+}
diff --git a/classes/rssutils.php b/classes/rssutils.php
index b69bb25a0..6fa1e9f4f 100755
--- a/classes/rssutils.php
+++ b/classes/rssutils.php
@@ -637,8 +637,11 @@ class RSSUtils {
 
 				$entry_link = rewrite_relative_url($site_url, $item->get_link());
 
+				$entry_language = $item->get_language();
+
 				_debug("title $entry_title", $debug_enabled);
 				_debug("link $entry_link", $debug_enabled);
+				_debug("language $entry_language", $debug_enabled);
 
 				if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
 
@@ -694,7 +697,6 @@ class RSSUtils {
 					$base_entry_id = $row["id"];
 					$entry_stored_hash = $row["content_hash"];
 					$article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
-					$entry_language = $row["lang"];
 
 					$existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
 					$entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
@@ -702,7 +704,6 @@ class RSSUtils {
 					$base_entry_id = false;
 					$entry_stored_hash = "";
 					$article_labels = array();
-					$entry_language = "";
 				}
 
 				$article = array("owner_uid" => $owner_uid, // read only
-- 
GitLab