From 2d4037ba619ad923c00261b9cd05597c9c836569 Mon Sep 17 00:00:00 2001
From: Kroese <kroese@users.noreply.github.com>
Date: Mon, 11 Dec 2023 11:24:51 +0100
Subject: [PATCH] Extend sitemap span (#4231)

* Extend sitemap span

* Keep cargo_fmt happy

* Add FETCH_LIMIT_SITEMAP

* Use FETCH_LIMIT_SITEMAP

* Keep cargo_fmt happy

* Update utils.rs

* Use SITEMAP_DAYS

* Keep cargo_fmt happy

* Sitemap

* Keep cargo_fmt happy

* Sitemap

* Sitemap

* Increase to 31 days
---
 crates/api/src/sitemap.rs          |  4 ++--
 crates/db_schema/src/impls/post.rs | 13 +++++++++++--
 crates/db_schema/src/utils.rs      |  2 ++
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/crates/api/src/sitemap.rs b/crates/api/src/sitemap.rs
index 438a8b8e8..b1b961350 100644
--- a/crates/api/src/sitemap.rs
+++ b/crates/api/src/sitemap.rs
@@ -26,7 +26,7 @@ async fn generate_urlset(
 }
 
 pub async fn get_sitemap(context: Data<LemmyContext>) -> LemmyResult<HttpResponse> {
-  info!("Generating sitemap with posts from last {} hours...", 24);
+  info!("Generating sitemap...",);
   let posts = Post::list_for_sitemap(&mut context.pool()).await?;
   info!("Loaded latest {} posts", posts.len());
 
@@ -36,7 +36,7 @@ pub async fn get_sitemap(context: Data<LemmyContext>) -> LemmyResult<HttpRespons
   Ok(
     HttpResponse::Ok()
       .content_type("application/xml")
-      .insert_header(header::CacheControl(vec![CacheDirective::MaxAge(86_400)])) // 24 h
+      .insert_header(header::CacheControl(vec![CacheDirective::MaxAge(3_600)])) // 1 h
       .body(buf),
   )
 }
diff --git a/crates/db_schema/src/impls/post.rs b/crates/db_schema/src/impls/post.rs
index f7ff633ec..d65fd2398 100644
--- a/crates/db_schema/src/impls/post.rs
+++ b/crates/db_schema/src/impls/post.rs
@@ -29,7 +29,15 @@ use crate::{
     PostUpdateForm,
   },
   traits::{Crud, Likeable, Saveable},
-  utils::{get_conn, naive_now, DbPool, DELETED_REPLACEMENT_TEXT, FETCH_LIMIT_MAX},
+  utils::{
+    get_conn,
+    naive_now,
+    DbPool,
+    DELETED_REPLACEMENT_TEXT,
+    FETCH_LIMIT_MAX,
+    SITEMAP_DAYS,
+    SITEMAP_LIMIT,
+  },
 };
 use ::url::Url;
 use chrono::{Duration, Utc};
@@ -109,8 +117,9 @@ impl Post {
       .filter(local.eq(true))
       .filter(deleted.eq(false))
       .filter(removed.eq(false))
-      .filter(published.ge(Utc::now().naive_utc() - Duration::days(1)))
+      .filter(published.ge(Utc::now().naive_utc() - Duration::days(SITEMAP_DAYS)))
       .order(published.desc())
+      .limit(SITEMAP_LIMIT)
       .load::<(DbUrl, chrono::DateTime<Utc>)>(conn)
       .await
   }
diff --git a/crates/db_schema/src/utils.rs b/crates/db_schema/src/utils.rs
index 9ac2d4c88..2b1179bee 100644
--- a/crates/db_schema/src/utils.rs
+++ b/crates/db_schema/src/utils.rs
@@ -49,6 +49,8 @@ use url::Url;
 
 const FETCH_LIMIT_DEFAULT: i64 = 10;
 pub const FETCH_LIMIT_MAX: i64 = 50;
+pub const SITEMAP_LIMIT: i64 = 50000;
+pub const SITEMAP_DAYS: i64 = 31;
 const POOL_TIMEOUT: Option<Duration> = Some(Duration::from_secs(5));
 pub const RANK_DEFAULT: f64 = 0.0001;
 
-- 
GitLab