From 2aef804f4b2cee7baf569f707eb74c27005d7daa Mon Sep 17 00:00:00 2001
From: Andrew Dolgov <noreply@fakecake.org>
Date: Mon, 20 Aug 2018 12:12:32 +0300
Subject: [PATCH] split transparent rewriting of locally cached media URLs to
 execute after both sanitize() and HOOK_RENDER_ARTICLE to allow plugins work
 on original source URLs consistently

---
 classes/api.php       |  4 +++
 classes/article.php   |  2 ++
 classes/feeds.php     |  2 ++
 include/functions.php | 81 ++++++++++++++++++++++++++++++-------------
 4 files changed, 65 insertions(+), 24 deletions(-)
 mode change 100644 => 100755 classes/api.php

diff --git a/classes/api.php b/classes/api.php
old mode 100644
new mode 100755
index 4c321d77e..5dbf8dc1f
--- a/classes/api.php
+++ b/classes/api.php
@@ -379,6 +379,8 @@ class API extends Handler {
 					$article = $p->hook_render_article_api(array("article" => $article));
 				}
 
+				$article['content'] = rewrite_cached_urls($article['content']);
+
 				array_push($articles, $article);
 
 			}
@@ -799,6 +801,8 @@ class API extends Handler {
 						$headline_row = $p->hook_render_article_api(array("headline" => $headline_row));
 					}
 
+					$headline_row['content'] = rewrite_cached_urls($headline_row['content']);
+
 					array_push($headlines, $headline_row);
 				}
 			} else if (is_numeric($result) && $result == -1) {
diff --git a/classes/article.php b/classes/article.php
index c8ee5b931..71dfdabc4 100755
--- a/classes/article.php
+++ b/classes/article.php
@@ -610,6 +610,8 @@ class Article extends Handler_Protected {
 				$line = $p->hook_render_article($line);
 			}
 
+			$line['content'] = rewrite_cached_urls($line['content']);
+
 			$num_comments = (int) $line["num_comments"];
 			$entry_comments = "";
 
diff --git a/classes/feeds.php b/classes/feeds.php
index 47a6b56b8..6bf14f45e 100755
--- a/classes/feeds.php
+++ b/classes/feeds.php
@@ -477,6 +477,8 @@ class Feeds extends Handler_Protected {
 						$line = $p->hook_render_article_cdm($line);
 					}
 
+					$line['content'] = rewrite_cached_urls($line['content']);
+
 					if ($vfeed_group_enabled && $line["feed_title"]) {
 						if ($feed_id != $vgroup_last_feed) {
 
diff --git a/include/functions.php b/include/functions.php
index a4e0e4d02..8acea8ef4 100755
--- a/include/functions.php
+++ b/include/functions.php
@@ -1564,38 +1564,31 @@
 		return false;
 	}
 
-	function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) {
-		if (!$owner) $owner = $_SESSION["uid"];
-
-		$res = trim($str); if (!$res) return '';
+	// check for locally cached (media) URLs and rewrite to local versions
+	// this is called separately after sanitize() and plugin render article hooks to allow
+	// plugins work on original source URLs used before caching
 
+	function rewrite_cached_urls($str) {
 		$charset_hack = '<head>
 				<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
 			</head>';
 
-		$res = trim($res); if (!$res) return '';
-
-		libxml_use_internal_errors(true);
+		$res = trim($str); if (!$res) return '';
 
 		$doc = new DOMDocument();
 		$doc->loadHTML($charset_hack . $res);
 		$xpath = new DOMXPath($doc);
 
-		$rewrite_base_url = $site_url ? $site_url : get_self_url_prefix();
+		$entries = $xpath->query('(//img[@src]|//video/source[@src]|//audio/source[@src])');
 
-		$entries = $xpath->query('(//a[@href]|//img[@src]|//video/source[@src]|//audio/source[@src])');
+		$need_saving = false;
 
 		foreach ($entries as $entry) {
 
-			if ($entry->hasAttribute('href')) {
-				$entry->setAttribute('href',
-					rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href')));
-
-				$entry->setAttribute('rel', 'noopener noreferrer');
-			}
-
 			if ($entry->hasAttribute('src')) {
-				$src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src'));
+
+				// should be already absolutized because this is called after sanitize()
+				$src = $entry->getAttribute('src');
 				$cached_filename = CACHE_DIR . '/images/' . sha1($src);
 
 				if (file_exists($cached_filename)) {
@@ -1613,14 +1606,54 @@
 
 					$src = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($src) . $suffix;
 
-					if ($entry->hasAttribute('srcset')) {
-						$entry->removeAttribute('srcset');
-					}
-
-					if ($entry->hasAttribute('sizes')) {
-						$entry->removeAttribute('sizes');
-					}
+					$entry->setAttribute('src', $src);
+					$need_saving = true;
 				}
+			}
+		}
+
+		if ($need_saving) {
+			$doc->removeChild($doc->firstChild); //remove doctype
+			$res = $doc->saveHTML();
+		}
+
+		return $res;
+	}
+
+	function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) {
+		if (!$owner) $owner = $_SESSION["uid"];
+
+		$res = trim($str); if (!$res) return '';
+
+		$charset_hack = '<head>
+				<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+			</head>';
+
+		$res = trim($res); if (!$res) return '';
+
+		libxml_use_internal_errors(true);
+
+		$doc = new DOMDocument();
+		$doc->loadHTML($charset_hack . $res);
+		$xpath = new DOMXPath($doc);
+
+		$rewrite_base_url = $site_url ? $site_url : get_self_url_prefix();
+
+		$entries = $xpath->query('(//a[@href]|//img[@src]|//video/source[@src]|//audio/source[@src])');
+
+		foreach ($entries as $entry) {
+
+			if ($entry->hasAttribute('href')) {
+				$entry->setAttribute('href',
+					rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href')));
+
+				$entry->setAttribute('rel', 'noopener noreferrer');
+			}
+
+			if ($entry->hasAttribute('src')) {
+				$src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src'));
+
+				// cache stuff has gone to rewrite_cached_urls()
 
 				$entry->setAttribute('src', $src);
 			}
-- 
GitLab