From 3e4701116d9a7a2b93646f2c9aed80b63175d206 Mon Sep 17 00:00:00 2001
From: Andrew Dolgov <noreply@fakecake.org>
Date: Fri, 16 Aug 2019 15:29:24 +0300
Subject: [PATCH] af_readability: add missing file

---
 classes/backend.php                           |  2 +-
 classes/handler/public.php                    | 20 ++---
 classes/pluginhandler.php                     |  5 +-
 classes/pluginhost.php                        |  2 +-
 classes/rpc.php                               |  2 +-
 include/functions.php                         |  2 +-
 .../andreskrey/Readability/Configuration.php  | 26 ------
 .../Readability/Nodes/DOM/DOMNodeList.php     | 82 +++++++++++++++++++
 .../Readability/Nodes/NodeTrait.php           | 51 +++++++++---
 .../Readability/Nodes/NodeUtility.php         | 20 +++++
 .../andreskrey/Readability/Readability.php    | 56 ++++++++++---
 11 files changed, 202 insertions(+), 66 deletions(-)
 create mode 100644 plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php

diff --git a/classes/backend.php b/classes/backend.php
index 5bd724728..122e28c65 100644
--- a/classes/backend.php
+++ b/classes/backend.php
@@ -88,7 +88,7 @@ class Backend extends Handler {
 	}
 
 	function help() {
-		$topic = basename(clean($_REQUEST["topic"])); // only one for now
+		$topic = clean_filename($_REQUEST["topic"]); // only one for now
 
 		if ($topic == "main") {
 			$info = get_hotkeys_info();
diff --git a/classes/handler/public.php b/classes/handler/public.php
index 05a84494b..91413b976 100755
--- a/classes/handler/public.php
+++ b/classes/handler/public.php
@@ -1203,30 +1203,30 @@ class Handler_Public extends Handler {
 	public function pluginhandler() {
 		$host = new PluginHost();
 
-		$plugin = basename(clean($_REQUEST["plugin"]));
+		$plugin_name = clean_filename($_REQUEST["plugin"]);
 		$method = clean($_REQUEST["pmethod"]);
 
-		$host->load($plugin, PluginHost::KIND_USER, 0);
+		$host->load($plugin_name, PluginHost::KIND_USER, 0);
 		$host->load_data();
 
-		$pclass = $host->get_plugin($plugin);
+		$plugin = $host->get_plugin($plugin_name);
 
-		if ($pclass) {
-			if (method_exists($pclass, $method)) {
-				if ($pclass->is_public_method($method)) {
-					$pclass->$method();
+		if ($plugin) {
+			if (method_exists($plugin, $method)) {
+				if ($plugin->is_public_method($method)) {
+					$plugin->$method();
 				} else {
-					user_error("pluginhandler: Requested private method '$method' of plugin '$plugin'.");
+					user_error("PluginHandler[PUBLIC]: Requested private method '$method' of plugin '$plugin_name'.", E_USER_WARNING);
 					header("Content-Type: text/json");
 					print error_json(6);
 				}
 			} else {
-				user_error("pluginhandler: Requested unknown method '$method' of plugin '$plugin'.");
+				user_error("PluginHandler[PUBLIC]: Requested unknown method '$method' of plugin '$plugin_name'.", E_USER_WARNING);
 				header("Content-Type: text/json");
 				print error_json(13);
 			}
 		} else {
-			user_error("pluginhandler: Requested method '$method' of unknown plugin '$plugin'.");
+			user_error("PluginHandler[PUBLIC]: Requested method '$method' of unknown plugin '$plugin_name'.", E_USER_WARNING);
 			header("Content-Type: text/json");
 			print error_json(14);
 		}
diff --git a/classes/pluginhandler.php b/classes/pluginhandler.php
index d10343e09..9682e440f 100644
--- a/classes/pluginhandler.php
+++ b/classes/pluginhandler.php
@@ -5,15 +5,18 @@ class PluginHandler extends Handler_Protected {
 	}
 
 	function catchall($method) {
-		$plugin = PluginHost::getInstance()->get_plugin(clean($_REQUEST["plugin"]));
+		$plugin_name = clean($_REQUEST["plugin"]);
+		$plugin = PluginHost::getInstance()->get_plugin($plugin_name);
 
 		if ($plugin) {
 			if (method_exists($plugin, $method)) {
 				$plugin->$method();
 			} else {
+				user_error("PluginHandler: Requested unknown method '$method' of plugin '$plugin_name'.", E_USER_WARNING);
 				print error_json(13);
 			}
 		} else {
+			user_error("PluginHandler: Requested method '$method' of unknown plugin '$plugin_name'.", E_USER_WARNING);
 			print error_json(14);
 		}
 	}
diff --git a/classes/pluginhost.php b/classes/pluginhost.php
index 9330e9e5e..eab808ae9 100755
--- a/classes/pluginhost.php
+++ b/classes/pluginhost.php
@@ -186,7 +186,7 @@ class PluginHost {
 
 		foreach ($plugins as $class) {
 			$class = trim($class);
-			$class_file = strtolower(basename($class));
+			$class_file = strtolower(clean_filename($class));
 
 			if (!is_dir(__DIR__."/../plugins/$class_file") &&
 					!is_dir(__DIR__."/../plugins.local/$class_file")) continue;
diff --git a/classes/rpc.php b/classes/rpc.php
index 8736cbb65..84c9cfe92 100755
--- a/classes/rpc.php
+++ b/classes/rpc.php
@@ -572,7 +572,7 @@ class RPC extends Handler_Protected {
 
 	function log() {
 		$msg = clean($_REQUEST['msg']);
-		$file = basename(clean($_REQUEST['file']));
+		$file = clean_filename($_REQUEST['file']);
 		$line = (int) clean($_REQUEST['line']);
 		$context = clean($_REQUEST['context']);
 
diff --git a/include/functions.php b/include/functions.php
index c326ac468..e78f0de9d 100644
--- a/include/functions.php
+++ b/include/functions.php
@@ -593,7 +593,7 @@
 	}
 
 	function clean_filename($filename) {
-		return basename(preg_replace("/\.\.|[\/\\\]/", "", $filename));
+		return basename(preg_replace("/\.\.|[\/\\\]/", "", clean($filename)));
 	}
 
 	function make_password($length = 12) {
diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php b/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php
index 6c17bc757..0632399c6 100644
--- a/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php
+++ b/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php
@@ -166,32 +166,6 @@ class Configuration
         return $this;
     }
 
-    /**
-     * @deprecated Use getCharThreshold. Will be removed in version 2.0
-     *
-     * @return int
-     */
-    public function getWordThreshold()
-    {
-        @trigger_error('getWordThreshold was replaced with getCharThreshold and will be removed in version 3.0', E_USER_DEPRECATED);
-
-        return $this->charThreshold;
-    }
-
-    /**
-     * @param int $charThreshold
-     *
-     * @return $this
-     */
-    public function setWordThreshold($charThreshold)
-    {
-        @trigger_error('setWordThreshold was replaced with setCharThreshold and will be removed in version 3.0', E_USER_DEPRECATED);
-
-        $this->charThreshold = $charThreshold;
-
-        return $this;
-    }
-
     /**
      * @return bool
      */
diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php
new file mode 100644
index 000000000..5149c0b98
--- /dev/null
+++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php
@@ -0,0 +1,82 @@
+<?php
+
+namespace andreskrey\Readability\Nodes\DOM;
+
+/**
+ * Class DOMNodeList.
+ *
+ * This is a fake DOMNodeList class that allows adding items to the list. The original class is static and the nodes
+ * are defined automagically when instantiating it. This fake version behaves exactly the same way but adds the function
+ * add() that allows to insert new DOMNodes into the DOMNodeList.
+ *
+ * It cannot extend the original DOMNodeList class because the functionality behind the property ->length is hidden
+ * from the user and cannot be extended, changed, or tweaked.
+ */
+class DOMNodeList implements \Countable, \IteratorAggregate
+{
+    /**
+     * @var array
+     */
+    protected $items = [];
+
+    /**
+     * @var int
+     */
+    protected $length = 0;
+
+    /**
+     * To allow access to length in the same way that DOMNodeList allows.
+     *
+     * {@inheritdoc}
+     */
+    public function __get($name)
+    {
+        switch ($name) {
+            case 'length':
+                return $this->length;
+            default:
+                trigger_error(sprintf('Undefined property: %s::%s', static::class, $name));
+        }
+    }
+
+    /**
+     * @param DOMNode|DOMElement|DOMComment $node
+     *
+     * @return DOMNodeList
+     */
+    public function add($node)
+    {
+        $this->items[] = $node;
+        $this->length++;
+
+        return $this;
+    }
+
+    /**
+     * @param int $offset
+     *
+     * @return DOMNode|DOMElement|DOMComment
+     */
+    public function item(int $offset)
+    {
+        return $this->items[$offset];
+    }
+
+    /**
+     * @return int|void
+     */
+    public function count(): int
+    {
+        return $this->length;
+    }
+
+    /**
+     * To make it compatible with iterator_to_array() function.
+     *
+     * {@inheritdoc}
+     */
+    public function getIterator(): \ArrayIterator
+    {
+        return new \ArrayIterator($this->items);
+    }
+}
diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php
index d7060ccbb..5198bbb5f 100644
--- a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php
+++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php
@@ -181,11 +181,11 @@ trait NodeTrait
     /**
      * Override for native hasAttribute.
      *
-     * @see getAttribute
-     *
      * @param $attributeName
      *
      * @return bool
+     *
+     * @see getAttribute
      */
     public function hasAttribute($attributeName)
     {
@@ -317,10 +317,14 @@ trait NodeTrait
      *
      * @param bool $filterEmptyDOMText Filter empty DOMText nodes?
      *
+     * @deprecated Use NodeUtility::filterTextNodes, function will be removed in version 3.0
+     *
      * @return array
      */
     public function getChildren($filterEmptyDOMText = false)
     {
+        @trigger_error('getChildren was replaced with NodeUtility::filterTextNodes and will be removed in version 3.0', E_USER_DEPRECATED);
+
         $ret = iterator_to_array($this->childNodes);
         if ($filterEmptyDOMText) {
             // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number
@@ -418,12 +422,12 @@ trait NodeTrait
     public function hasSingleTagInsideElement($tag)
     {
         // There should be exactly 1 element child with given tag
-        if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== $tag) {
+        if (count($children = NodeUtility::filterTextNodes($this->childNodes)) !== 1 || $children->item(0)->nodeName !== $tag) {
             return false;
         }
 
         // And there should be no text nodes with real content
-        return array_reduce($children, function ($carry, $child) {
+        return array_reduce(iterator_to_array($children), function ($carry, $child) {
             if (!$carry === false) {
                 return false;
             }
@@ -443,7 +447,7 @@ trait NodeTrait
     {
         $result = false;
         if ($this->hasChildNodes()) {
-            foreach ($this->getChildren() as $child) {
+            foreach ($this->childNodes as $child) {
                 if (in_array($child->nodeName, $this->divToPElements)) {
                     $result = true;
                 } else {
@@ -500,18 +504,22 @@ trait NodeTrait
             );
     }
 
+    /**
+     * In the original JS project they check if the node has the style display=none, which unfortunately
+     * in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
+     *
+     * Might be a good idea to check for classes or other attributes like 'aria-hidden'
+     *
+     * @return bool
+     */
     public function isProbablyVisible()
     {
-        /*
-         * In the original JS project they check if the node has the style display=none, which unfortunately
-         * in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
-         *
-         * Might be a good idea to check for classes or other attributes like 'aria-hidden'
-         */
-
         return !preg_match('/display:( )?none/', $this->getAttribute('style')) && !$this->hasAttribute('hidden');
     }
 
+    /**
+     * @return bool
+     */
     public function isWhitespace()
     {
         return ($this->nodeType === XML_TEXT_NODE && mb_strlen(trim($this->textContent)) === 0) ||
@@ -557,4 +565,23 @@ trait NodeTrait
             $count -= ($count - $nodes->length);
         }
     }
+
+    /**
+     * Mimics JS's firstElementChild property. PHP only has firstChild which could be any type of DOMNode. Use this
+     * function to get the first one that is an DOMElement node.
+     *
+     * @return \DOMElement|null
+     */
+    public function getFirstElementChild()
+    {
+        if ($this->childNodes instanceof \Traversable) {
+            foreach ($this->childNodes as $node) {
+                if ($node instanceof \DOMElement) {
+                    return $node;
+                }
+            }
+        }
+
+        return null;
+    }
 }
diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php
index 7a1f18ee4..cbf78bae0 100644
--- a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php
+++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php
@@ -5,6 +5,7 @@ namespace andreskrey\Readability\Nodes;
 use andreskrey\Readability\Nodes\DOM\DOMDocument;
 use andreskrey\Readability\Nodes\DOM\DOMElement;
 use andreskrey\Readability\Nodes\DOM\DOMNode;
+use andreskrey\Readability\Nodes\DOM\DOMNodeList;
 
 /**
  * Class NodeUtility.
@@ -157,4 +158,23 @@ class NodeUtility
 
         return ($originalNode) ? $originalNode->nextSibling : $originalNode;
     }
+
+    /**
+     * Remove all empty DOMNodes from DOMNodeLists.
+     *
+     * @param \DOMNodeList $list
+     *
+     * @return DOMNodeList
+     */
+    public static function filterTextNodes(\DOMNodeList $list)
+    {
+        $newList = new DOMNodeList();
+        foreach ($list as $node) {
+            if ($node->nodeType !== XML_TEXT_NODE || mb_strlen(trim($node->nodeValue))) {
+                $newList->add($node);
+            }
+        }
+
+        return $newList;
+    }
 }
diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Readability.php b/plugins/af_readability/vendor/andreskrey/Readability/Readability.php
index 7b7eed6bf..6bcbf78d7 100644
--- a/plugins/af_readability/vendor/andreskrey/Readability/Readability.php
+++ b/plugins/af_readability/vendor/andreskrey/Readability/Readability.php
@@ -56,6 +56,13 @@ class Readability
      */
     protected $author = null;
 
+    /**
+     * Website name.
+     *
+     * @var string|null
+     */
+    protected $siteName = null;
+
     /**
      * Direction of the text.
      *
@@ -287,10 +294,10 @@ class Readability
 
         $values = [];
         // property is a space-separated list of values
-        $propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image)\s*/i';
+        $propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image|site_name)(?!:)\s*/i';
 
         // name is a single value
-        $namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image)\s*$/i';
+        $namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image|site_name)(?!:)\s*$/i';
 
         // Find description tags.
         foreach ($this->dom->getElementsByTagName('meta') as $meta) {
@@ -332,7 +339,6 @@ class Readability
          * This could be easily replaced with an ugly set of isset($values['key']) or a bunch of ??s.
          * Will probably replace it with ??s after dropping support of PHP5.6
          */
-
         $key = current(array_intersect([
             'dc:title',
             'dcterm:title',
@@ -373,11 +379,18 @@ class Readability
 
         // get main image
         $key = current(array_intersect([
+            'image',
             'og:image',
             'twitter:image'
         ], array_keys($values)));
 
         $this->setImage(isset($values[$key]) ? $values[$key] : null);
+
+        $key = current(array_intersect([
+            'og:site_name'
+        ], array_keys($values)));
+
+        $this->setSiteName(isset($values[$key]) ? $values[$key] : null);
     }
 
     /**
@@ -722,7 +735,7 @@ class Readability
                  */
                 if ($node->hasSingleTagInsideElement('p') && $node->getLinkDensity() < 0.25) {
                     $this->logger->debug(sprintf('[Get Nodes] Found DIV with a single P node, removing DIV. Node content is: \'%s\'', substr($node->nodeValue, 0, 128)));
-                    $pNode = $node->getChildren(true)[0];
+                    $pNode = NodeUtility::filterTextNodes($node->childNodes)->item(0);
                     $node->parentNode->replaceChild($pNode, $node);
                     $node = $pNode;
                     $elementsToScore[] = $node;
@@ -1082,7 +1095,7 @@ class Readability
             // If the top candidate is the only child, use parent instead. This will help sibling
             // joining logic when adjacent content is actually located in parent's sibling node.
             $parentOfTopCandidate = $topCandidate->parentNode;
-            while ($parentOfTopCandidate->nodeName !== 'body' && count($parentOfTopCandidate->getChildren(true)) === 1) {
+            while ($parentOfTopCandidate->nodeName !== 'body' && count(NodeUtility::filterTextNodes($parentOfTopCandidate->childNodes)) === 1) {
                 $topCandidate = $parentOfTopCandidate;
                 $parentOfTopCandidate = $topCandidate->parentNode;
             }
@@ -1102,14 +1115,16 @@ class Readability
         $siblingScoreThreshold = max(10, $topCandidate->contentScore * 0.2);
         // Keep potential top candidate's parent node to try to get text direction of it later.
         $parentOfTopCandidate = $topCandidate->parentNode;
-        $siblings = $parentOfTopCandidate->getChildren();
+        $siblings = $parentOfTopCandidate->childNodes;
 
         $hasContent = false;
 
         $this->logger->info('[Rating] Adding top candidate siblings...');
 
-        /** @var DOMElement $sibling */
-        foreach ($siblings as $sibling) {
+        /* @var DOMElement $sibling */
+        // Can't foreach here because down there we might change the tag name and that causes the foreach to skip items
+        for ($i = 0; $i < $siblings->length; $i++) {
+            $sibling = $siblings[$i];
             $append = false;
 
             if ($sibling === $topCandidate) {
@@ -1147,7 +1162,6 @@ class Readability
                      * We have a node that isn't a common block level element, like a form or td tag.
                      * Turn it into a div so it doesn't get filtered out later by accident.
                      */
-
                     $sibling = NodeUtility::setNodeTag($sibling, 'div');
                 }
 
@@ -1266,11 +1280,11 @@ class Readability
         // Remove single-cell tables
         foreach ($article->shiftingAwareGetElementsByTagName('table') as $table) {
             /** @var DOMNode $table */
-            $tbody = $table->hasSingleTagInsideElement('tbody') ? $table->childNodes[0] : $table;
+            $tbody = $table->hasSingleTagInsideElement('tbody') ? $table->getFirstElementChild() : $table;
             if ($tbody->hasSingleTagInsideElement('tr')) {
-                $row = $tbody->firstChild;
+                $row = $tbody->getFirstElementChild();
                 if ($row->hasSingleTagInsideElement('td')) {
-                    $cell = $row->firstChild;
+                    $cell = $row->getFirstElementChild();
                     $cell = NodeUtility::setNodeTag($cell, (array_reduce(iterator_to_array($cell->childNodes), function ($carry, $node) {
                         return $node->isPhrasingContent() && $carry;
                     }, true)) ? 'p' : 'div');
@@ -1597,7 +1611,7 @@ class Readability
             $node->removeAttribute('class');
         }
 
-        for ($node = $node->firstChild; $node !== null; $node = $node->nextSibling) {
+        for ($node = $node->getFirstElementChild(); $node !== null; $node = $node->nextSibling) {
             $this->_cleanClasses($node);
         }
     }
@@ -1756,6 +1770,22 @@ class Readability
         $this->author = $author;
     }
 
+    /**
+     * @return string|null
+     */
+    public function getSiteName()
+    {
+        return $this->siteName;
+    }
+
+    /**
+     * @param string $siteName
+     */
+    protected function setSiteName($siteName)
+    {
+        $this->siteName = $siteName;
+    }
+
     /**
      * @return null|string
      */
-- 
GitLab