diff --git a/classes/api.php b/classes/api.php index 15576c7c0dbf0d443732eeaa450f0528bed5e754..e3dce0c174078c61ed0593c8466f2eb5905f2a93 100644 --- a/classes/api.php +++ b/classes/api.php @@ -280,7 +280,7 @@ class API extends Handler { $article_id = join(",", array_filter(explode(",", db_escape_string($_REQUEST["article_id"])), is_numeric)); - $query = "SELECT id,title,link,content,feed_id,comments,int_id, + $query = "SELECT id,title,link,content,cached_content,feed_id,comments,int_id, marked,unread,published, ".SUBSTRING_FOR_DATE."(updated,1,16) as updated, author @@ -309,7 +309,7 @@ class API extends Handler { "comments" => $line["comments"], "author" => $line["author"], "updated" => strtotime($line["updated"]), - "content" => $line["content"], + "content" => $line["cached_content"] != "" ? $line["cached_content"] : $line["content"], "feed_id" => $line["feed_id"], "attachments" => $attachments ); diff --git a/classes/feeds.php b/classes/feeds.php index 9a74130b0e21841e35883f7ab2a8b0b29129845a..49adf38795b8381d0182ab43d08e383a051971f7 100644 --- a/classes/feeds.php +++ b/classes/feeds.php @@ -158,16 +158,20 @@ class Feeds extends Handler_Protected { // Update the feed if required with some basic flood control $result = db_query($this->link, - "SELECT cache_images,".SUBSTRING_FOR_DATE."(last_updated,1,19) AS last_updated + "SELECT cache_images,cache_content,".SUBSTRING_FOR_DATE."(last_updated,1,19) AS last_updated FROM ttrss_feeds WHERE id = '$feed'"); if (db_num_rows($result) != 0) { $last_updated = strtotime(db_fetch_result($result, 0, "last_updated")); $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images")); + $cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content")); - if (!$cache_images && time() - $last_updated > 120 || isset($_REQUEST['DevForceUpdate'])) { + if (!$cache_images && !$cache_content && time() - $last_updated > 120 || isset($_REQUEST['DevForceUpdate'])) { include "rssfuncs.php"; update_rss_feed($this->link, $feed, true, true); + } else { + db_query($this->link, "UPDATE ttrss_feeds SET last_updated = '1970-01-01', last_update_started = '1970-01-01' + WHERE id = '$feed'"); } } } @@ -234,6 +238,7 @@ class Feeds extends Handler_Protected { $feed_title = $qfh_ret[1]; $feed_site_url = $qfh_ret[2]; $last_error = $qfh_ret[3]; + $cache_content = true; $vgroup_last_feed = $vgr_last_feed; @@ -627,6 +632,10 @@ class Feeds extends Handler_Protected { $feed_site_url = $line["site_url"]; + if ($cache_content && $line["cached_content"] != "") { + $line["content_preview"] =& $line["cached_content"]; + } + $article_content = sanitize($this->link, $line["content_preview"], false, false, $feed_site_url); diff --git a/classes/pref/feeds.php b/classes/pref/feeds.php index 4de3576c88db2e9f412ff80e1d119b053cc7dd6c..c51174b07647d5d392f0200c48c5df147397bfdb 100644 --- a/classes/pref/feeds.php +++ b/classes/pref/feeds.php @@ -653,6 +653,19 @@ class Pref_Feeds extends Handler_Protected { $checked> <label for=\"cache_images\">". __('Cache images locally')."</label>"; + $cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content")); + + if ($cache_content) { + $checked = "checked=\"1\""; + } else { + $checked = ""; + } + + print "<hr/><input dojoType=\"dijit.form.CheckBox\" type=\"checkbox\" id=\"cache_content\" + name=\"cache_content\" + $checked> <label for=\"cache_content\">". + __('Cache content locally')."</label>"; + $mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result, 0, "mark_unread_on_update")); if ($mark_unread_on_update) { @@ -914,6 +927,8 @@ class Pref_Feeds extends Handler_Protected { db_escape_string($_POST["include_in_digest"])); $cache_images = checkbox_to_sql_bool( db_escape_string($_POST["cache_images"])); + $cache_content = checkbox_to_sql_bool( + db_escape_string($_POST["cache_content"])); $update_method = (int) db_escape_string($_POST["update_method"]); $always_display_enclosures = checkbox_to_sql_bool( @@ -938,8 +953,6 @@ class Pref_Feeds extends Handler_Protected { $category_qpart_nocomma = ""; } - $cache_images_qpart = "cache_images = $cache_images,"; - if (!$batch) { $result = db_query($this->link, "UPDATE ttrss_feeds SET @@ -951,7 +964,8 @@ class Pref_Feeds extends Handler_Protected { auth_pass = '$auth_pass', private = $private, rtl_content = $rtl_content, - $cache_images_qpart + cache_images = $cache_images, + cache_content = $cache_content, include_in_digest = $include_in_digest, always_display_enclosures = $always_display_enclosures, mark_unread_on_update = $mark_unread_on_update, @@ -1023,6 +1037,10 @@ class Pref_Feeds extends Handler_Protected { $qpart = "cache_images = $cache_images"; break; + case "cache_content": + $qpart = "cache_content = $cache_content"; + break; + case "rtl_content": $qpart = "rtl_content = $rtl_content"; break; diff --git a/db-updater.php b/db-updater.php index e0900828ce4d47bdd0ae99f63e48a747e316ac3e..216986f39e79413d9a60a8c9dd2c0951fd585049 100644 --- a/db-updater.php +++ b/db-updater.php @@ -1,5 +1,5 @@ <?php - set_include_path(get_include_path() . PATH_SEPARATOR . + set_include_path(get_include_path() . PATH_SEPARATOR . dirname(__FILE__) . "/include"); require_once "functions.php"; @@ -125,6 +125,7 @@ function confirmOP() { foreach (array_keys($update_versions) as $v) { if ($v == $version + 1) { print "<p>".T_sprintf("Updating to version %d...", $v)."</p>"; + db_query($link, "BEGIN"); $fp = fopen($update_versions[$v], "r"); if ($fp) { while (!feof($fp)) { @@ -136,6 +137,7 @@ function confirmOP() { } } fclose($fp); + db_query($link, "COMMIT"); print "<p>".__("Checking version... "); diff --git a/include/functions.php b/include/functions.php index 821e314f6ce4b4555f83b856df36cd4362857d33..55333ccd667e265649c6e2738234a39755dc3ac6 100644 --- a/include/functions.php +++ b/include/functions.php @@ -1,6 +1,6 @@ <?php define('EXPECTED_CONFIG_VERSION', 26); - define('SCHEMA_VERSION', 98); + define('SCHEMA_VERSION', 99); $fetch_last_error = false; @@ -2367,7 +2367,7 @@ } } - $content_query_part = "content as content_preview,"; + $content_query_part = "content as content_preview, cached_content, "; if (is_numeric($feed)) { @@ -3111,15 +3111,17 @@ //if (!$zoom_mode) { print "<article id='$id'><![CDATA["; }; - $result = db_query($link, "SELECT rtl_content, always_display_enclosures FROM ttrss_feeds + $result = db_query($link, "SELECT rtl_content, always_display_enclosures, cache_content FROM ttrss_feeds WHERE id = '$feed_id' AND owner_uid = $owner_uid"); if (db_num_rows($result) == 1) { $rtl_content = sql_bool_to_bool(db_fetch_result($result, 0, "rtl_content")); $always_display_enclosures = sql_bool_to_bool(db_fetch_result($result, 0, "always_display_enclosures")); + $cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content")); } else { $rtl_content = false; $always_display_enclosures = false; + $cache_content = false; } if ($rtl_content) { @@ -3146,7 +3148,8 @@ tag_cache, author, orig_feed_id, - note + note, + cached_content FROM ttrss_entries,ttrss_user_entries WHERE id = '$id' AND ref_id = id AND owner_uid = $owner_uid"); @@ -3344,6 +3347,10 @@ } } + if ($cache_content && $line["cached_content"] != "") { + $line["content"] =& $line["cached_content"]; + } + $article_content = sanitize($link, $line["content"], false, $owner_uid, $feed_site_url); @@ -4460,6 +4467,11 @@ } if ($show_content) { + + if ($line["cached_content"] != "") { + $line["content_preview"] =& $line["cached_content"]; + } + if ($sanitize_content) { $headline_row["content"] = sanitize($link, $line["content_preview"], false, false, $line["site_url"]); diff --git a/include/rssfuncs.php b/include/rssfuncs.php index d1e9e6e01612ab7398f02a7426951cd8b81c5747..b26495f67623671833c48804328a4b0f21337d4e 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -157,6 +157,7 @@ } // function update_daemon_common + // ignore_daemon is not used function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false, $override_url = false) { @@ -166,35 +167,15 @@ $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']; - if (!$_REQUEST["daemon"] && !$ignore_daemon) { - return false; - } - if ($debug_enabled) { _debug("update_rss_feed: start"); } - if (!$ignore_daemon) { - - if (DB_TYPE == "pgsql") { - $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')"; - } else { - $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))"; - } - - $result = db_query($link, "SELECT id,update_interval,auth_login, - auth_pass,cache_images,update_method,last_updated - FROM ttrss_feeds WHERE id = '$feed' AND $updstart_thresh_qpart"); - - } else { - - $result = db_query($link, "SELECT id,update_interval,auth_login, - feed_url,auth_pass,cache_images,update_method,last_updated, - mark_unread_on_update, owner_uid, update_on_checksum_change, - pubsub_state - FROM ttrss_feeds WHERE id = '$feed'"); - - } + $result = db_query($link, "SELECT id,update_interval,auth_login, + feed_url,auth_pass,cache_images,update_method,last_updated,cache_content, + mark_unread_on_update, owner_uid, update_on_checksum_change, + pubsub_state + FROM ttrss_feeds WHERE id = '$feed'"); if (db_num_rows($result) == 0) { if ($debug_enabled) { @@ -240,6 +221,7 @@ } $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images")); + $cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content")); $fetch_url = db_fetch_result($result, 0, "feed_url"); $feed = db_escape_string($feed); @@ -624,6 +606,7 @@ } $entry_content_unescaped = $entry_content; + $entry_cached_content = ""; if ($use_simplepie) { $entry_comments = strip_tags($item->data["comments"]); @@ -782,6 +765,20 @@ _debug("update_rss_feed: base guid not found"); } + if ($cache_content) { + if ($debug_enabled) { + _debug("update_rss_feed: caching content..."); + } + + $entry_cached_content = cache_content($link, $entry_link, $auth_login, $auth_pass); + + if ($cache_images && is_writable(CACHE_DIR . '/images')) + $entry_cached_content = cache_images($entry_cached_content, $site_url, $debug_enabled); + + $entry_cached_content = db_escape_string($entry_cached_content, false); + + } + // base post entry does not exist, create it $result = db_query($link, @@ -792,6 +789,7 @@ updated, content, content_hash, + cached_content, no_orig_date, date_updated, date_entered, @@ -804,6 +802,7 @@ '$entry_link', '$entry_timestamp_fmt', '$entry_content', + '$entry_cached_content', '$content_hash', $no_orig_date, NOW(), @@ -996,6 +995,19 @@ if ($content_hash != $orig_content_hash) { $post_needs_update = true; $update_insignificant = false; + + if ($cache_content) { + if ($debug_enabled) { + _debug("update_rss_feed: caching content because original checksum changed..."); + } + + $entry_cached_content = cache_content($link, $entry_link, $auth_login, $auth_pass); + + if ($cache_images && is_writable(CACHE_DIR . '/images')) + $entry_cached_content = cache_images($entry_cached_content, $site_url, $debug_enabled); + + $entry_cached_content = db_escape_string($entry_cached_content, false); + } } if (db_escape_string($orig_title) != $entry_title) { @@ -1016,6 +1028,7 @@ db_query($link, "UPDATE ttrss_entries SET title = '$entry_title', content = '$entry_content', content_hash = '$content_hash', + cached_content = '$entry_cached_content', updated = '$entry_timestamp_fmt', num_comments = '$num_comments' WHERE id = '$ref_id'"); @@ -1484,4 +1497,25 @@ } } } + + function cache_content($link, $url, $login, $pass) { + + $content = fetch_file_contents($url, $login, $pass); + + if ($content) { + $doc = new DOMDocument(); + @$doc->loadHTML($content); + $xpath = new DOMXPath($doc); + + $node = $doc->getElementsByTagName('body')->item(0); + + if ($node) { + $content = $doc->saveXML($node, LIBXML_NOEMPTYTAG); + + return $content; + } + } + + return ""; + } ?> diff --git a/schema/ttrss_schema_mysql.sql b/schema/ttrss_schema_mysql.sql index 4b2a3369db23cf772b9ee7ca0860870a0c6111f9..2877894260efd9a54e1eb89d43869652be7a7246 100644 --- a/schema/ttrss_schema_mysql.sql +++ b/schema/ttrss_schema_mysql.sql @@ -116,6 +116,7 @@ create table ttrss_feeds (id integer not null auto_increment primary key, hidden bool not null default false, include_in_digest boolean not null default true, cache_images boolean not null default false, + cache_content boolean not null default false, auth_pass_encrypted boolean not null default false, last_viewed datetime default null, last_update_started datetime default null, @@ -150,6 +151,7 @@ create table ttrss_entries (id integer not null primary key auto_increment, updated datetime not null, content longtext not null, content_hash varchar(250) not null, + cached_content longtext, no_orig_date bool not null default 0, date_entered datetime not null, date_updated datetime not null, @@ -306,7 +308,7 @@ create table ttrss_tags (id integer primary key auto_increment, create table ttrss_version (schema_version int not null) ENGINE=InnoDB DEFAULT CHARSET=UTF8; -insert into ttrss_version values (98); +insert into ttrss_version values (99); create table ttrss_enclosures (id integer primary key auto_increment, content_url text not null, diff --git a/schema/ttrss_schema_pgsql.sql b/schema/ttrss_schema_pgsql.sql index a8a769315c81aa4355491ea23e0319ac18e2bc7b..432ebf88dc9c0a8a72df86c2ce93559af77133f5 100644 --- a/schema/ttrss_schema_pgsql.sql +++ b/schema/ttrss_schema_pgsql.sql @@ -78,6 +78,7 @@ create table ttrss_feeds (id serial not null primary key, include_in_digest boolean not null default true, rtl_content boolean not null default false, cache_images boolean not null default false, + cache_content boolean not null default false, last_viewed timestamp default null, last_update_started timestamp default null, update_method integer not null default 0, @@ -130,6 +131,7 @@ create table ttrss_entries (id serial not null primary key, updated timestamp not null, content text not null, content_hash varchar(250) not null, + cached_content text, no_orig_date boolean not null default false, date_entered timestamp not null, date_updated timestamp not null, @@ -254,7 +256,7 @@ create index ttrss_tags_post_int_id_idx on ttrss_tags(post_int_id); create table ttrss_version (schema_version int not null); -insert into ttrss_version values (98); +insert into ttrss_version values (99); create table ttrss_enclosures (id serial not null primary key, content_url text not null, diff --git a/schema/versions/mysql/99.sql b/schema/versions/mysql/99.sql new file mode 100644 index 0000000000000000000000000000000000000000..d7f9e02307f2271fbabb4e5f6eefd2f79e46fc17 --- /dev/null +++ b/schema/versions/mysql/99.sql @@ -0,0 +1,12 @@ +begin; + +alter table ttrss_feeds add column cache_content bool; +update ttrss_feeds set cache_content = false; +alter table ttrss_feeds change cache_content cache_content bool not null; +alter table ttrss_feeds alter column cache_content set default false; + +alter table ttrss_entries add column cached_content longtext; + +update ttrss_version set schema_version = 99; + +commit; diff --git a/schema/versions/pgsql/99.sql b/schema/versions/pgsql/99.sql new file mode 100644 index 0000000000000000000000000000000000000000..846056cd618200cd8bad4ce66b13eb7aa8eb8558 --- /dev/null +++ b/schema/versions/pgsql/99.sql @@ -0,0 +1,12 @@ +begin; + +alter table ttrss_feeds add column cache_content boolean; +update ttrss_feeds set cache_content = false; +alter table ttrss_feeds alter column cache_content set not null; +alter table ttrss_feeds alter column cache_content set default false; + +alter table ttrss_entries add column cached_content text; + +update ttrss_version set schema_version = 99; + +commit;