From 39ede9862f1df94b24cbe476ec66eca99a1d1a2f Mon Sep 17 00:00:00 2001
From: Andrew Dolgov <fox@madoka.volgo-balt.ru>
Date: Mon, 29 Apr 2013 16:59:36 +0400
Subject: [PATCH] experimental: decode numerical utf entities on import in
 entry title

---
 include/rssfuncs.php | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/rssfuncs.php b/include/rssfuncs.php
index 0ecab6a25..a5d3898ce 100644
--- a/include/rssfuncs.php
+++ b/include/rssfuncs.php
@@ -553,6 +553,7 @@
 				_debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
 
 				$entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8');
+				$entry_title = decode_numeric_entities($entry_title);
 
 				$entry_link = rewrite_relative_url($site_url, $item->get_link());
 
@@ -1388,4 +1389,15 @@
 
 		_debug("Cleaned $rc cached tags.");
 	}
+
+	function utf8_entity_decode($entity){
+		$convmap = array(0x0, 0x10000, 0, 0xfffff);
+		return mb_decode_numericentity($entity, $convmap, 'UTF-8');
+	}
+
+	function decode_numeric_entities($body) {
+		$body = preg_replace('/&#\d{2,5};/ue', "utf8_entity_decode('$0')", $body );
+		$body = preg_replace('/&#x([a-fA-F0-7]{2,8});/ue', "utf8_entity_decode('&#'.hexdec('$1').';')", $body );
+		return $body;
+	}
 ?>
-- 
GitLab