From c70e26db31d520c554b867325ace95cbee6687e3 Mon Sep 17 00:00:00 2001
From: Andrew Dolgov <noreply@fakecake.org>
Date: Mon, 28 Sep 2020 19:46:31 +0300
Subject: [PATCH] validate url: feed urlencoded() URL to filter_var() only

---
 classes/urlhelper.php | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/classes/urlhelper.php b/classes/urlhelper.php
index 461d5fb7e..d7b7d004a 100644
--- a/classes/urlhelper.php
+++ b/classes/urlhelper.php
@@ -64,13 +64,6 @@ class UrlHelper {
 		if (!in_array(strtolower($tokens['scheme']), ['http', 'https']))
 			return false;
 
-		if ($tokens['path']) {
-			$tokens['path'] = implode("/",
-										array_map("rawurlencode",
-											array_map("rawurldecode",
-												explode("/", $tokens['path']))));
-		}
-
 		//convert IDNA hostname to punycode if possible
 		if (function_exists("idn_to_ascii")) {
 			if (mb_detect_encoding($tokens['host']) != 'ASCII') {
@@ -78,9 +71,21 @@ class UrlHelper {
 			}
 		}
 
+		// separate set of tokens with urlencoded 'path' because filter_var() rightfully fails on non-latin characters
+		// (used for validation only, we actually request the original URL, in case of urlencode breaking it)
+		$tokens_filter_var = $tokens;
+
+		if ($tokens['path']) {
+			$tokens_filter_var['path'] = implode("/",
+										array_map("rawurlencode",
+											array_map("rawurldecode",
+												explode("/", $tokens['path']))));
+		}
+
 		$url = self::build_url($tokens);
+		$url_filter_var = self::build_url($tokens_filter_var);
 
-		if (filter_var($url, FILTER_VALIDATE_URL) === false)
+		if (filter_var($url_filter_var, FILTER_VALIDATE_URL) === false)
 			return false;
 
 		if ($extended_filtering) {
-- 
GitLab