From 36868e648c2fece6a9d75c904368f68136b64b1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= <f@miniflux.net>
Date: Sun, 23 May 2021 20:45:37 -0700
Subject: [PATCH] Add new config option CLEANUP_ARCHIVE_BATCH_SIZE

---
 config/options.go              | 9 +++++++++
 config/parser.go               | 2 ++
 miniflux.1                     | 5 +++++
 service/scheduler/scheduler.go | 7 ++++---
 storage/entry.go               | 8 ++++----
 5 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/config/options.go b/config/options.go
index 2d62fe9d..041c0222 100644
--- a/config/options.go
+++ b/config/options.go
@@ -43,6 +43,7 @@ const (
 	defaultCleanupFrequencyHours              = 24
 	defaultCleanupArchiveReadDays             = 60
 	defaultCleanupArchiveUnreadDays           = 180
+	defaultCleanupArchiveBatchSize            = 10000
 	defaultCleanupRemoveSessionsDays          = 30
 	defaultProxyImages                        = "http-only"
 	defaultFetchYouTubeWatchTime              = false
@@ -101,6 +102,7 @@ type Options struct {
 	cleanupFrequencyHours              int
 	cleanupArchiveReadDays             int
 	cleanupArchiveUnreadDays           int
+	cleanupArchiveBatchSize            int
 	cleanupRemoveSessionsDays          int
 	pollingFrequency                   int
 	batchSize                          int
@@ -160,6 +162,7 @@ func NewOptions() *Options {
 		cleanupFrequencyHours:              defaultCleanupFrequencyHours,
 		cleanupArchiveReadDays:             defaultCleanupArchiveReadDays,
 		cleanupArchiveUnreadDays:           defaultCleanupArchiveUnreadDays,
+		cleanupArchiveBatchSize:            defaultCleanupArchiveBatchSize,
 		cleanupRemoveSessionsDays:          defaultCleanupRemoveSessionsDays,
 		pollingFrequency:                   defaultPollingFrequency,
 		batchSize:                          defaultBatchSize,
@@ -293,6 +296,11 @@ func (o *Options) CleanupArchiveUnreadDays() int {
 	return o.cleanupArchiveUnreadDays
 }
 
+// CleanupArchiveBatchSize returns the number of entries to archive for each interval.
+func (o *Options) CleanupArchiveBatchSize() int {
+	return o.cleanupArchiveBatchSize
+}
+
 // CleanupRemoveSessionsDays returns the number of days after which to remove sessions.
 func (o *Options) CleanupRemoveSessionsDays() int {
 	return o.cleanupRemoveSessionsDays
@@ -488,6 +496,7 @@ func (o *Options) SortedOptions() []*Option {
 		"CERT_FILE":                              o.certFile,
 		"CLEANUP_ARCHIVE_READ_DAYS":              o.cleanupArchiveReadDays,
 		"CLEANUP_ARCHIVE_UNREAD_DAYS":            o.cleanupArchiveUnreadDays,
+		"CLEANUP_ARCHIVE_BATCH_SIZE":             o.cleanupArchiveBatchSize,
 		"CLEANUP_FREQUENCY_HOURS":                o.cleanupFrequencyHours,
 		"CLEANUP_REMOVE_SESSIONS_DAYS":           o.cleanupRemoveSessionsDays,
 		"CREATE_ADMIN":                           o.createAdmin,
diff --git a/config/parser.go b/config/parser.go
index cff06de7..5e62b73a 100644
--- a/config/parser.go
+++ b/config/parser.go
@@ -119,6 +119,8 @@ func (p *Parser) parseLines(lines []string) (err error) {
 			p.opts.cleanupArchiveReadDays = parseInt(value, defaultCleanupArchiveReadDays)
 		case "CLEANUP_ARCHIVE_UNREAD_DAYS":
 			p.opts.cleanupArchiveUnreadDays = parseInt(value, defaultCleanupArchiveUnreadDays)
+		case "CLEANUP_ARCHIVE_BATCH_SIZE":
+			p.opts.cleanupArchiveBatchSize = parseInt(value, defaultCleanupArchiveBatchSize)
 		case "CLEANUP_REMOVE_SESSIONS_DAYS":
 			p.opts.cleanupRemoveSessionsDays = parseInt(value, defaultCleanupRemoveSessionsDays)
 		case "WORKER_POOL_SIZE":
diff --git a/miniflux.1 b/miniflux.1
index ab1c2696..eb7db1c2 100644
--- a/miniflux.1
+++ b/miniflux.1
@@ -223,6 +223,11 @@ Set to -1 to keep all unread entries.
 .br
 Default is 180 days\&.
 .TP
+.B CLEANUP_ARCHIVE_BATCH_SIZE
+Number of entries to archive for each job interval\&.
+.br
+Default is 10000 entries\&.
+.TP
 .B CLEANUP_REMOVE_SESSIONS_DAYS
 Number of days after removing old sessions from the database\&.
 .br
diff --git a/service/scheduler/scheduler.go b/service/scheduler/scheduler.go
index 6ec55279..c8dd1322 100644
--- a/service/scheduler/scheduler.go
+++ b/service/scheduler/scheduler.go
@@ -31,6 +31,7 @@ func Serve(store *storage.Storage, pool *worker.Pool) {
 		config.Opts.CleanupFrequencyHours(),
 		config.Opts.CleanupArchiveReadDays(),
 		config.Opts.CleanupArchiveUnreadDays(),
+		config.Opts.CleanupArchiveBatchSize(),
 		config.Opts.CleanupRemoveSessionsDays(),
 	)
 }
@@ -47,14 +48,14 @@ func feedScheduler(store *storage.Storage, pool *worker.Pool, frequency, batchSi
 	}
 }
 
-func cleanupScheduler(store *storage.Storage, frequency, archiveReadDays, archiveUnreadDays, sessionsDays int) {
+func cleanupScheduler(store *storage.Storage, frequency, archiveReadDays, archiveUnreadDays, archiveBatchSize, sessionsDays int) {
 	for range time.Tick(time.Duration(frequency) * time.Hour) {
 		nbSessions := store.CleanOldSessions(sessionsDays)
 		nbUserSessions := store.CleanOldUserSessions(sessionsDays)
 		logger.Info("[Scheduler:Cleanup] Cleaned %d sessions and %d user sessions", nbSessions, nbUserSessions)
 
 		startTime := time.Now()
-		if rowsAffected, err := store.ArchiveEntries(model.EntryStatusRead, archiveReadDays); err != nil {
+		if rowsAffected, err := store.ArchiveEntries(model.EntryStatusRead, archiveReadDays, archiveBatchSize); err != nil {
 			logger.Error("[Scheduler:ArchiveReadEntries] %v", err)
 		} else {
 			logger.Info("[Scheduler:ArchiveReadEntries] %d entries changed", rowsAffected)
@@ -65,7 +66,7 @@ func cleanupScheduler(store *storage.Storage, frequency, archiveReadDays, archiv
 		}
 
 		startTime = time.Now()
-		if rowsAffected, err := store.ArchiveEntries(model.EntryStatusUnread, archiveUnreadDays); err != nil {
+		if rowsAffected, err := store.ArchiveEntries(model.EntryStatusUnread, archiveUnreadDays, archiveBatchSize); err != nil {
 			logger.Error("[Scheduler:ArchiveUnreadEntries] %v", err)
 		} else {
 			logger.Info("[Scheduler:ArchiveUnreadEntries] %d entries changed", rowsAffected)
diff --git a/storage/entry.go b/storage/entry.go
index 6e50aee7..968877f2 100644
--- a/storage/entry.go
+++ b/storage/entry.go
@@ -299,8 +299,8 @@ func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries
 }
 
 // ArchiveEntries changes the status of entries to "removed" after the given number of days.
-func (s *Storage) ArchiveEntries(status string, days int) (int64, error) {
-	if days < 0 {
+func (s *Storage) ArchiveEntries(status string, days, limit int) (int64, error) {
+	if days < 0 || limit <= 0 {
 		return 0, nil
 	}
 
@@ -310,10 +310,10 @@ func (s *Storage) ArchiveEntries(status string, days int) (int64, error) {
 		SET
 			status='removed'
 		WHERE
-			id=ANY(SELECT id FROM entries WHERE status=$1 AND starred is false AND share_code='' AND created_at < now () - '%d days'::interval ORDER BY created_at ASC LIMIT 5000)
+			id=ANY(SELECT id FROM entries WHERE status=$1 AND starred is false AND share_code='' AND created_at < now () - '%d days'::interval ORDER BY created_at ASC LIMIT %d)
 	`
 
-	result, err := s.db.Exec(fmt.Sprintf(query, days), status)
+	result, err := s.db.Exec(fmt.Sprintf(query, days, limit), status)
 	if err != nil {
 		return 0, fmt.Errorf(`store: unable to archive %s entries: %v`, status, err)
 	}
-- 
GitLab