From 21b4c031983f2b102c79b33b2d64f575e10912db Mon Sep 17 00:00:00 2001 From: Travis Ralston <travpc@gmail.com> Date: Mon, 29 Jan 2018 20:40:10 -0700 Subject: [PATCH] Limit the URL preview title length by words Fixes #62 --- config.sample.yaml | 1 + .../turt2live/matrix-media-repo/config/config.go | 2 ++ .../services/url_service/opengraph_previewer.go | 9 ++++++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/config.sample.yaml b/config.sample.yaml index 6cb6258b..70a8b7c4 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -91,6 +91,7 @@ urlPreviews: enabled: true # If enabled, the preview_url routes will be accessible maxPageSizeBytes: 10485760 # 10MB default, 0 to disable numWords: 30 # The number of words to include in a preview (maximum) + numTitleWords: 30 # The maximum number of words to include in a preview's title # The number of workers to use when generating url previews. Raise this number if url # previews are slow or timing out. diff --git a/src/github.com/turt2live/matrix-media-repo/config/config.go b/src/github.com/turt2live/matrix-media-repo/config/config.go index 69207837..edeea26f 100644 --- a/src/github.com/turt2live/matrix-media-repo/config/config.go +++ b/src/github.com/turt2live/matrix-media-repo/config/config.go @@ -62,6 +62,7 @@ type ThumbnailSize struct { type UrlPreviewsConfig struct { Enabled bool `yaml:"enabled"` NumWords int `yaml:"numWords"` + NumTitleWords int `yaml:"numTitleWords"` MaxPageSizeBytes int64 `yaml:"maxPageSizeBytes"` NumWorkers int `yaml:"numWorkers"` DisallowedNetworks []string `yaml:"disallowedNetworks,flow"` @@ -204,6 +205,7 @@ func NewDefaultConfig() *MediaRepoConfig { UrlPreviews: &UrlPreviewsConfig{ Enabled: true, NumWords: 30, + NumTitleWords: 30, MaxPageSizeBytes: 10485760, // 10mb NumWorkers: 10, DisallowedNetworks: []string{ diff --git a/src/github.com/turt2live/matrix-media-repo/services/url_service/opengraph_previewer.go b/src/github.com/turt2live/matrix-media-repo/services/url_service/opengraph_previewer.go index 1cab61ca..a397395d 100644 --- a/src/github.com/turt2live/matrix-media-repo/services/url_service/opengraph_previewer.go +++ b/src/github.com/turt2live/matrix-media-repo/services/url_service/opengraph_previewer.go @@ -71,11 +71,15 @@ func (p *openGraphUrlPreviewer) GeneratePreview(urlStr string) (openGraphResult, og.Images = calcImages(html) } + // Be sure to trim the title and description + og.Title = summarize(og.Title, config.Get().UrlPreviews.NumTitleWords) + og.Description = summarize(og.Description, config.Get().UrlPreviews.NumWords) + graph := &openGraphResult{ Type: og.Type, Url: og.URL, Title: og.Title, - Description: summarize(og.Description), + Description: og.Description, SiteName: og.SiteName, } @@ -260,7 +264,7 @@ func calcImages(html string) []*opengraph.Image { return []*opengraph.Image{&img} } -func summarize(text string) (string) { +func summarize(text string, maxWords int) (string) { // Normalize the whitespace to be something useful (crush it to one giant line) surroundingWhitespace := regexp.MustCompile(`^[\s\p{Zs}]+|[\s\p{Zs}]+$`) interiorWhitespace := regexp.MustCompile(`[\s\p{Zs}]{2,}`) @@ -269,7 +273,6 @@ func summarize(text string) (string) { text = interiorWhitespace.ReplaceAllString(text, " ") text = newlines.ReplaceAllString(text, " ") - maxWords := config.Get().UrlPreviews.NumWords words := strings.Split(text, " ") if len(words) < maxWords { return text -- GitLab