From 74a247fc5c4e458f1aed118ed346f7ec53de5c3d Mon Sep 17 00:00:00 2001
From: Andrew Dolgov <noreply@fakecake.org>
Date: Thu, 17 Feb 2022 22:38:38 +0300
Subject: [PATCH] rewrite_relative: whitelist specific schemes for URLs with
 'known' content-types i.e. specified for enclosures

---
 classes/rssutils.php  |  2 +-
 classes/urlhelper.php | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/classes/rssutils.php b/classes/rssutils.php
index d7284a7bc..9995b0e43 100755
--- a/classes/rssutils.php
+++ b/classes/rssutils.php
@@ -736,7 +736,7 @@ class RSSUtils {
 
 						// TODO: Just use FeedEnclosure (and modify it to cover whatever justified this)?
 						$e_item = array(
-							UrlHelper::rewrite_relative($site_url, $e->link),
+							UrlHelper::rewrite_relative($site_url, $e->link, "", "", $e->type),
 							$e->type, $e->length, $e->title, $e->width, $e->height);
 
 						// Yet another episode of "mysql utf8_general_ci is gimped"
diff --git a/classes/urlhelper.php b/classes/urlhelper.php
index 9ac7781ef..22fe067d5 100644
--- a/classes/urlhelper.php
+++ b/classes/urlhelper.php
@@ -6,6 +6,10 @@ class UrlHelper {
 		"tel"
 	];
 
+	const EXTRA_SCHEMES_BY_CONTENT_TYPE = [
+		"application/x-bittorrent" => [ "magnet" ],
+	];
+
 	// TODO: class properties can be switched to PHP typing if/when the minimum PHP_VERSION is raised to 7.4.0+
 	/** @var string */
 	static $fetch_last_error;
@@ -52,10 +56,16 @@ class UrlHelper {
 	 * @param string $rel_url Possibly relative URL in the document
 	 * @param string $owner_element Owner element tag name (i.e. "a") (optional)
 	 * @param string $owner_attribute Owner attribute (i.e. "href") (optional)
+	 * @param string $content_type URL content type as specified by enclosures, etc.
 	 *
 	 * @return false|string Absolute URL or false on failure (either during URL parsing or validation)
 	 */
-	public static function rewrite_relative($base_url, $rel_url, string $owner_element = "", string $owner_attribute = "") {
+	public static function rewrite_relative($base_url,
+				$rel_url,
+				string $owner_element = "",
+				string $owner_attribute = "",
+				string $content_type = "") {
+
 		$rel_parts = parse_url($rel_url);
 
 		/**
@@ -80,6 +90,11 @@ class UrlHelper {
 				$owner_element == "a" &&
 				$owner_attribute == "href") {
 			return $rel_url;
+		// allow some extra schemes for links with feed-specified content type i.e. enclosures
+		} else if ($content_type &&
+				is_array(self::EXTRA_SCHEMES_BY_CONTENT_TYPE[$content_type]) &&
+				in_array($rel_parts["scheme"], self::EXTRA_SCHEMES_BY_CONTENT_TYPE[$content_type])) {
+			return $rel_url;
 		// allow limited subset of inline base64-encoded images for IMG elements
 		} else if (($rel_parts["scheme"] ?? "") == "data" &&
 				preg_match('%^image/(webp|gif|jpg|png|svg);base64,%', $rel_parts["path"]) &&
-- 
GitLab