diff --git a/classes/sanitizer.php b/classes/sanitizer.php index 07766dc16282fb231a97f22de08e719f770ce5dc..0a444a2966d8a49c20ce42986c7005e4b7c815b0 100644 --- a/classes/sanitizer.php +++ b/classes/sanitizer.php @@ -74,7 +74,7 @@ class Sanitizer { if ($entry->hasAttribute('href')) { $entry->setAttribute('href', - rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href'))); + UrlHelper::rewrite_relative($rewrite_base_url, $entry->getAttribute('href'), $entry->tagName, "href")); $entry->setAttribute('rel', 'noopener noreferrer'); $entry->setAttribute("target", "_blank"); @@ -82,7 +82,7 @@ class Sanitizer { if ($entry->hasAttribute('src')) { $entry->setAttribute('src', - rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src'))); + UrlHelper::rewrite_relative($rewrite_base_url, $entry->getAttribute('src'), $entry->tagName, "src")); } if ($entry->nodeName == 'img') { @@ -94,7 +94,7 @@ class Sanitizer { $matches = RSSUtils::decode_srcset($entry->getAttribute('srcset')); for ($i = 0; $i < count($matches); $i++) { - $matches[$i]["url"] = rewrite_relative_url($rewrite_base_url, $matches[$i]["url"]); + $matches[$i]["url"] = UrlHelper::rewrite_relative($rewrite_base_url, $matches[$i]["url"]); } $entry->setAttribute("srcset", RSSUtils::encode_srcset($matches)); diff --git a/classes/urlhelper.php b/classes/urlhelper.php index 648d609a47c6127a0ee867070dc91342a8e18408..b4545939f2c24927c688844567db822dabded4db 100644 --- a/classes/urlhelper.php +++ b/classes/urlhelper.php @@ -1,6 +1,6 @@ <?php class UrlHelper { - const ALLOWED_RELATIVE_SCHEMES = [ + const EXTRA_HREF_SCHEMES = [ "magnet", "mailto", "tel" @@ -27,22 +27,35 @@ class UrlHelper { /** * Converts a (possibly) relative URL to a absolute one, using provided base URL. + * Provides some exceptions for additional schemes like data: if called with owning element/attribute. * * @param string $base_url Base URL (i.e. from where the document is) * @param string $rel_url Possibly relative URL in the document + * @param string $owner_element Owner node tag name (i.e. A) (optional) + * @param string $owner_attribute Owner attribute (i.e. href) (optional) * * @return string Absolute URL */ - public static function rewrite_relative($base_url, $rel_url) { + public static function rewrite_relative($base_url, $rel_url, string $owner_element = "", string $owner_attribute = "") { $rel_parts = parse_url($rel_url); if (!empty($rel_parts['host']) && !empty($rel_parts['scheme'])) { return self::validate($rel_url); + + // protocol-relative URL (rare but they exist) } else if (strpos($rel_url, "//") === 0) { - # protocol-relative URL (rare but they exist) return self::validate("https:" . $rel_url); - } else if (array_search($rel_parts["scheme"] ?? "", self::ALLOWED_RELATIVE_SCHEMES, true) !== false) { + // allow some extra schemes for A href + } else if (in_array($rel_parts["scheme"] ?? "", self::EXTRA_HREF_SCHEMES) && + $owner_element == "a" && + $owner_attribute == "href") { + return $rel_url; + // allow limited subset of inline base64-encoded images for IMG elements + } else if ($rel_parts["scheme"] == "data" && + preg_match('%^image/(webp|gif|jpg|png|svg);base64,%', $rel_parts["path"]) && + $owner_element == "img" && + $owner_attribute == "src") { return $rel_url; } else { $base_parts = parse_url($base_url);