From d55bd2f2bcb7eb981f54be46dc233ef7266dafd8 Mon Sep 17 00:00:00 2001
From: Nutomic <me@nutomic.com>
Date: Tue, 24 Oct 2023 22:25:52 +0200
Subject: [PATCH] Allow Arabic and Cyrillic usernames/community names (fixes
 #1764) (#4083)

* Allow Arabic and Cyrillic usernames/community names (fixes #1764)

* update comment
---
 Cargo.lock                           |  2 --
 Cargo.toml                           |  2 +-
 api_tests/src/user.spec.ts           | 18 +++++++++++++++
 crates/utils/src/utils/validation.rs | 33 +++++++++++++++++++++++-----
 4 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 378f6cb2e..6d0639eda 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11,8 +11,6 @@ checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3"
 [[package]]
 name = "activitypub_federation"
 version = "0.5.0-beta.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "509cbafa1b42e01b7ca76c26298814a6638825df4fd67aef2f4c9d36a39c2b6d"
 dependencies = [
  "activitystreams-kinds",
  "actix-web",
diff --git a/Cargo.toml b/Cargo.toml
index a01cc687b..95652b311 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -70,7 +70,7 @@ lemmy_routes = { version = "=0.19.0-rc.3", path = "./crates/routes" }
 lemmy_db_views = { version = "=0.19.0-rc.3", path = "./crates/db_views" }
 lemmy_db_views_actor = { version = "=0.19.0-rc.3", path = "./crates/db_views_actor" }
 lemmy_db_views_moderator = { version = "=0.19.0-rc.3", path = "./crates/db_views_moderator" }
-activitypub_federation = { version = "0.5.0-beta.3", default-features = false, features = [
+activitypub_federation = { git = "https://github.com/LemmyNet/activitypub-federation-rust.git", branch = "webfinger-alphabets", default-features = false, features = [
   "actix-web",
 ] }
 diesel = "2.1.0"
diff --git a/api_tests/src/user.spec.ts b/api_tests/src/user.spec.ts
index eddf568b8..d651af7e1 100644
--- a/api_tests/src/user.spec.ts
+++ b/api_tests/src/user.spec.ts
@@ -129,3 +129,21 @@ test("Requests with invalid auth should be treated as unauthenticated", async ()
   let posts = invalid_auth.getPosts(form);
   expect((await posts).posts).toBeDefined();
 });
+
+test("Create user with Arabic name", async () => {
+  let userRes = await registerUser(alpha, "تجريب");
+  expect(userRes.jwt).toBeDefined();
+  let user = new LemmyHttp(alphaUrl, {
+    headers: { Authorization: `Bearer ${userRes.jwt ?? ""}` },
+  });
+
+  let site = await getSite(user);
+  expect(site.my_user).toBeDefined();
+  if (!site.my_user) {
+    throw "Missing site user";
+  }
+  apShortname = `@${site.my_user.local_user_view.person.name}@lemmy-alpha:8541`;
+
+  let alphaPerson = (await resolvePerson(alpha, apShortname)).person;
+  expect(alphaPerson).toBeDefined();
+});
diff --git a/crates/utils/src/utils/validation.rs b/crates/utils/src/utils/validation.rs
index 46fe9e2d0..36aa2c5f1 100644
--- a/crates/utils/src/utils/validation.rs
+++ b/crates/utils/src/utils/validation.rs
@@ -4,8 +4,6 @@ use once_cell::sync::Lazy;
 use regex::{Regex, RegexBuilder};
 use url::Url;
 
-static VALID_ACTOR_NAME_REGEX: Lazy<Regex> =
-  Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex"));
 static VALID_POST_TITLE_REGEX: Lazy<Regex> =
   Lazy::new(|| Regex::new(r".*\S{3,200}.*").expect("compile regex"));
 
@@ -89,10 +87,23 @@ fn has_newline(name: &str) -> bool {
 }
 
 pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> LemmyResult<()> {
-  let check = name.chars().count() <= actor_name_max_length
-    && VALID_ACTOR_NAME_REGEX.is_match(name)
-    && !has_newline(name);
-  if !check {
+  static VALID_ACTOR_NAME_REGEX_EN: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex"));
+  static VALID_ACTOR_NAME_REGEX_AR: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^[\p{Arabic}0-9_]{3,}$").expect("compile regex"));
+  static VALID_ACTOR_NAME_REGEX_RU: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^[\p{Cyrillic}0-9_]{3,}$").expect("compile regex"));
+
+  let check = name.chars().count() <= actor_name_max_length && !has_newline(name);
+
+  // Only allow characters from a single alphabet per username. This avoids problems with lookalike
+  // characters like `o` which looks identical in Latin and Cyrillic, and can be used to imitate
+  // other users. Checks for additional alphabets can be added in the same way.
+  let lang_check = VALID_ACTOR_NAME_REGEX_EN.is_match(name)
+    || VALID_ACTOR_NAME_REGEX_AR.is_match(name)
+    || VALID_ACTOR_NAME_REGEX_RU.is_match(name);
+
+  if !check || !lang_check {
     Err(LemmyErrorType::InvalidName.into())
   } else {
     Ok(())
@@ -329,8 +340,18 @@ mod tests {
     let actor_name_max_length = 20;
     assert!(is_valid_actor_name("Hello_98", actor_name_max_length).is_ok());
     assert!(is_valid_actor_name("ten", actor_name_max_length).is_ok());
+    assert!(is_valid_actor_name("تجريب", actor_name_max_length).is_ok());
+    assert!(is_valid_actor_name("تجريب_123", actor_name_max_length).is_ok());
+    assert!(is_valid_actor_name("Владимир", actor_name_max_length).is_ok());
+
+    // mixed scripts
+    assert!(is_valid_actor_name("تجريب_abc", actor_name_max_length).is_err());
+    assert!(is_valid_actor_name("Влад_abc", actor_name_max_length).is_err());
+    // dash
     assert!(is_valid_actor_name("Hello-98", actor_name_max_length).is_err());
+    // too short
     assert!(is_valid_actor_name("a", actor_name_max_length).is_err());
+    // empty
     assert!(is_valid_actor_name("", actor_name_max_length).is_err());
   }
 
-- 
GitLab