From e9c062a189cfad71922fc576d636610da18006d4 Mon Sep 17 00:00:00 2001
From: Andrew Dolgov <noreply@fakecake.org>
Date: Fri, 18 Jun 2021 11:20:57 +0300
Subject: [PATCH] UrlHelper::rewrite_relative():

 - support invoking specifying owner URL element/attribute
 - restrict mailto/magnet/tel schemes for A href
 - allow some data: base64 image types for IMG src

Sanitizer::sanitize():

 - when checking href and src attributes, pass element tagname and attribute to rewrite_relative()
---
 classes/sanitizer.php |  6 +++---
 classes/urlhelper.php | 21 +++++++++++++++++----
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/classes/sanitizer.php b/classes/sanitizer.php
index 07766dc16..0a444a296 100644
--- a/classes/sanitizer.php
+++ b/classes/sanitizer.php
@@ -74,7 +74,7 @@ class Sanitizer {
 
 			if ($entry->hasAttribute('href')) {
 				$entry->setAttribute('href',
-					rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href')));
+					UrlHelper::rewrite_relative($rewrite_base_url, $entry->getAttribute('href'), $entry->tagName, "href"));
 
 				$entry->setAttribute('rel', 'noopener noreferrer');
 				$entry->setAttribute("target", "_blank");
@@ -82,7 +82,7 @@ class Sanitizer {
 
 			if ($entry->hasAttribute('src')) {
 				$entry->setAttribute('src',
-					rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src')));
+					UrlHelper::rewrite_relative($rewrite_base_url, $entry->getAttribute('src'), $entry->tagName, "src"));
 			}
 
 			if ($entry->nodeName == 'img') {
@@ -94,7 +94,7 @@ class Sanitizer {
 				$matches = RSSUtils::decode_srcset($entry->getAttribute('srcset'));
 
 				for ($i = 0; $i < count($matches); $i++) {
-					$matches[$i]["url"] = rewrite_relative_url($rewrite_base_url, $matches[$i]["url"]);
+					$matches[$i]["url"] = UrlHelper::rewrite_relative($rewrite_base_url, $matches[$i]["url"]);
 				}
 
 				$entry->setAttribute("srcset", RSSUtils::encode_srcset($matches));
diff --git a/classes/urlhelper.php b/classes/urlhelper.php
index 648d609a4..b4545939f 100644
--- a/classes/urlhelper.php
+++ b/classes/urlhelper.php
@@ -1,6 +1,6 @@
 <?php
 class UrlHelper {
-	const ALLOWED_RELATIVE_SCHEMES = [
+	const EXTRA_HREF_SCHEMES = [
 		"magnet",
 		"mailto",
 		"tel"
@@ -27,22 +27,35 @@ class UrlHelper {
 
 	/**
 	 * Converts a (possibly) relative URL to a absolute one, using provided base URL.
+	 * Provides some exceptions for additional schemes like data: if called with owning element/attribute.
 	 *
 	 * @param string $base_url     Base URL (i.e. from where the document is)
 	 * @param string $rel_url Possibly relative URL in the document
+	 * @param string $owner_element Owner node tag name (i.e. A) (optional)
+	 * @param string $owner_attribute Owner attribute (i.e. href) (optional)
 	 *
 	 * @return string Absolute URL
 	 */
-	public static function rewrite_relative($base_url, $rel_url) {
+	public static function rewrite_relative($base_url, $rel_url, string $owner_element = "", string $owner_attribute = "") {
 
 		$rel_parts = parse_url($rel_url);
 
 		if (!empty($rel_parts['host']) && !empty($rel_parts['scheme'])) {
 			return self::validate($rel_url);
+
+		// protocol-relative URL (rare but they exist)
 		} else if (strpos($rel_url, "//") === 0) {
-			# protocol-relative URL (rare but they exist)
 			return self::validate("https:" . $rel_url);
-		} else if (array_search($rel_parts["scheme"] ?? "", self::ALLOWED_RELATIVE_SCHEMES, true) !== false) {
+		// allow some extra schemes for A href
+		} else if (in_array($rel_parts["scheme"] ?? "", self::EXTRA_HREF_SCHEMES) &&
+				$owner_element == "a" &&
+				$owner_attribute == "href") {
+			return $rel_url;
+		// allow limited subset of inline base64-encoded images for IMG elements
+		} else if ($rel_parts["scheme"] == "data" &&
+				preg_match('%^image/(webp|gif|jpg|png|svg);base64,%', $rel_parts["path"]) &&
+				$owner_element == "img" &&
+				$owner_attribute == "src") {
 			return $rel_url;
 		} else {
 			$base_parts = parse_url($base_url);
-- 
GitLab