diff --git a/config.sample.yaml b/config.sample.yaml index 70a8b7c4bc2d736305624347c6969a94cceb6f47..9778489291053e8ee6e26962edb422e24b0d38ac 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -90,8 +90,15 @@ downloads: urlPreviews: enabled: true # If enabled, the preview_url routes will be accessible maxPageSizeBytes: 10485760 # 10MB default, 0 to disable + + # Note: URL previews are limited to a given number of words, which are then limited to a number + # of characters, taking off the last word if it needs to. This also applies for the title. + numWords: 30 # The number of words to include in a preview (maximum) + maxLength: 200 # The maximum number of characters for a description + numTitleWords: 30 # The maximum number of words to include in a preview's title + maxTitleLength: 100 # The maximum number of characters for a title # The number of workers to use when generating url previews. Raise this number if url # previews are slow or timing out. diff --git a/src/github.com/turt2live/matrix-media-repo/config/config.go b/src/github.com/turt2live/matrix-media-repo/config/config.go index edeea26f1f37c69a77eba85108fc8d041d12244f..9b35eecdcaac077e281c0590c9312af2801e034d 100644 --- a/src/github.com/turt2live/matrix-media-repo/config/config.go +++ b/src/github.com/turt2live/matrix-media-repo/config/config.go @@ -63,6 +63,8 @@ type UrlPreviewsConfig struct { Enabled bool `yaml:"enabled"` NumWords int `yaml:"numWords"` NumTitleWords int `yaml:"numTitleWords"` + MaxLength int `yaml:"maxLength"` + MaxTitleLength int `yaml:"maxTitleLength"` MaxPageSizeBytes int64 `yaml:"maxPageSizeBytes"` NumWorkers int `yaml:"numWorkers"` DisallowedNetworks []string `yaml:"disallowedNetworks,flow"` @@ -206,6 +208,8 @@ func NewDefaultConfig() *MediaRepoConfig { Enabled: true, NumWords: 30, NumTitleWords: 30, + MaxLength: 200, + MaxTitleLength: 100, MaxPageSizeBytes: 10485760, // 10mb NumWorkers: 10, DisallowedNetworks: []string{ diff --git a/src/github.com/turt2live/matrix-media-repo/services/url_service/opengraph_previewer.go b/src/github.com/turt2live/matrix-media-repo/services/url_service/opengraph_previewer.go index a397395da86dd1e8ec9e49b8e5cb94429632b7dd..b0daae91a21b0e8272f72b34d46b379122fc341c 100644 --- a/src/github.com/turt2live/matrix-media-repo/services/url_service/opengraph_previewer.go +++ b/src/github.com/turt2live/matrix-media-repo/services/url_service/opengraph_previewer.go @@ -72,8 +72,8 @@ func (p *openGraphUrlPreviewer) GeneratePreview(urlStr string) (openGraphResult, } // Be sure to trim the title and description - og.Title = summarize(og.Title, config.Get().UrlPreviews.NumTitleWords) - og.Description = summarize(og.Description, config.Get().UrlPreviews.NumWords) + og.Title = summarize(og.Title, config.Get().UrlPreviews.NumTitleWords, config.Get().UrlPreviews.MaxTitleLength) + og.Description = summarize(og.Description, config.Get().UrlPreviews.NumWords, config.Get().UrlPreviews.MaxLength) graph := &openGraphResult{ Type: og.Type, @@ -264,7 +264,7 @@ func calcImages(html string) []*opengraph.Image { return []*opengraph.Image{&img} } -func summarize(text string, maxWords int) (string) { +func summarize(text string, maxWords int, maxLength int) (string) { // Normalize the whitespace to be something useful (crush it to one giant line) surroundingWhitespace := regexp.MustCompile(`^[\s\p{Zs}]+|[\s\p{Zs}]+$`) interiorWhitespace := regexp.MustCompile(`[\s\p{Zs}]{2,}`) @@ -274,8 +274,28 @@ func summarize(text string, maxWords int) (string) { text = newlines.ReplaceAllString(text, " ") words := strings.Split(text, " ") - if len(words) < maxWords { - return text + result := text + if len(words) >= maxWords { + result = strings.Join(words[:maxWords], " ") } - return strings.Join(words[:maxWords], " ") + + if len(result) > maxLength { + // First try trimming off the last word + words = strings.Split(result, " ") + newResult := words[0] + for _, word := range words { + if len(newResult+" "+word) > maxLength { + break + } + newResult = newResult + " " + word + } + result = newResult + } + + if len(result) > maxLength { + // It's still too long, just trim the thing and add an ellipsis + result = result[:maxLength] + "..." + } + + return result }