From 26bb5220279630b97a91ff5237d332af3673145b Mon Sep 17 00:00:00 2001
From: Travis Ralston <travpc@gmail.com>
Date: Tue, 4 Jul 2023 22:43:35 -0600
Subject: [PATCH] Re-shift some of the previewers

---
 .../preview_controller/preview_controller.go  |  4 +-
 .../preview_resource_handler.go               |  4 +-
 pipelines/_steps/url_preview/upload_image.go  |  7 +--
 pipelines/pipeline_preview/pipeline.go        |  4 +-
 {url_previewers => url_previewing}/acl.go     |  2 +-
 .../calculated_previewer.go                   | 26 ++++------
 {url_previewers => url_previewing}/http.go    | 51 +++++++++----------
 .../oembed_previewer.go                       | 10 ++--
 .../opengraph_previewer.go                    | 12 ++---
 {url_previewers => url_previewing}/types.go   | 16 +++---
 {url_previewers => url_previewing}/util.go    |  2 +-
 11 files changed, 65 insertions(+), 73 deletions(-)
 rename {url_previewers => url_previewing}/acl.go (99%)
 rename {url_previewers => url_previewing}/calculated_previewer.go (66%)
 rename {url_previewers => url_previewing}/http.go (82%)
 rename {url_previewers => url_previewing}/oembed_previewer.go (92%)
 rename {url_previewers => url_previewing}/opengraph_previewer.go (94%)
 rename {url_previewers => url_previewing}/types.go (53%)
 rename {url_previewers => url_previewing}/util.go (97%)

diff --git a/controllers/preview_controller/preview_controller.go b/controllers/preview_controller/preview_controller.go
index 1a58ee83..a4e6b6a4 100644
--- a/controllers/preview_controller/preview_controller.go
+++ b/controllers/preview_controller/preview_controller.go
@@ -13,7 +13,7 @@ import (
 	"github.com/turt2live/matrix-media-repo/storage"
 	"github.com/turt2live/matrix-media-repo/storage/stores"
 	"github.com/turt2live/matrix-media-repo/types"
-	"github.com/turt2live/matrix-media-repo/url_previewers"
+	"github.com/turt2live/matrix-media-repo/url_previewing"
 	"github.com/turt2live/matrix-media-repo/util"
 )
 
@@ -55,7 +55,7 @@ func GetPreview(urlStr string, onHost string, forUserId string, atTs int64, lang
 			return nil, common.ErrInvalidHost
 		}
 		parsedUrl.Fragment = "" // Remove fragment because it's not important for servers
-		urlToPreview := &url_previewers.UrlPayload{
+		urlToPreview := &url_previewing.UrlPayload{
 			UrlString: urlStr,
 			ParsedUrl: parsedUrl,
 		}
diff --git a/controllers/preview_controller/preview_resource_handler.go b/controllers/preview_controller/preview_resource_handler.go
index b45dfd03..0c7c5ac0 100644
--- a/controllers/preview_controller/preview_resource_handler.go
+++ b/controllers/preview_controller/preview_resource_handler.go
@@ -5,7 +5,7 @@ import (
 	"sync"
 
 	"github.com/getsentry/sentry-go"
-	url_previewers2 "github.com/turt2live/matrix-media-repo/url_previewers"
+	url_previewers2 "github.com/turt2live/matrix-media-repo/url_previewing"
 	"github.com/turt2live/matrix-media-repo/util/stream_util"
 
 	"github.com/disintegration/imaging"
@@ -80,7 +80,7 @@ func urlPreviewWorkFn(request *resource_handler.WorkRequest) (resp *urlPreviewRe
 
 	db := storage.GetDatabase().GetUrlStore(ctx)
 
-	var preview url_previewers2.PreviewResult
+	var preview url_previewers2.Result
 	err := url_previewers2.ErrPreviewUnsupported
 
 	// Try oEmbed first
diff --git a/pipelines/_steps/url_preview/upload_image.go b/pipelines/_steps/url_preview/upload_image.go
index 26dd92e5..791d3812 100644
--- a/pipelines/_steps/url_preview/upload_image.go
+++ b/pipelines/_steps/url_preview/upload_image.go
@@ -9,15 +9,16 @@ import (
 	"github.com/turt2live/matrix-media-repo/datastores"
 	"github.com/turt2live/matrix-media-repo/pipelines/pipeline_upload"
 	"github.com/turt2live/matrix-media-repo/thumbnailing"
-	"github.com/turt2live/matrix-media-repo/url_previewers"
+	"github.com/turt2live/matrix-media-repo/url_previewing"
 	"github.com/turt2live/matrix-media-repo/util"
 )
 
-func UploadImage(ctx rcontext.RequestContext, image *url_previewers.PreviewImage, onHost string, userId string, forRecord *database.DbUrlPreview) {
-	if image == nil {
+func UploadImage(ctx rcontext.RequestContext, image *url_previewing.Image, onHost string, userId string, forRecord *database.DbUrlPreview) {
+	if image == nil || image.Data == nil {
 		return
 	}
 
+	defer image.Data.Close()
 	pr, pw := io.Pipe()
 	tee := io.TeeReader(image.Data, pw)
 	mediaChan := make(chan *database.DbMedia)
diff --git a/pipelines/pipeline_preview/pipeline.go b/pipelines/pipeline_preview/pipeline.go
index 9f1156a9..bd5312ee 100644
--- a/pipelines/pipeline_preview/pipeline.go
+++ b/pipelines/pipeline_preview/pipeline.go
@@ -10,7 +10,7 @@ import (
 	"github.com/turt2live/matrix-media-repo/common/rcontext"
 	"github.com/turt2live/matrix-media-repo/database"
 	"github.com/turt2live/matrix-media-repo/pipelines/_steps/url_preview"
-	url_previewers2 "github.com/turt2live/matrix-media-repo/url_previewers"
+	url_previewers2 "github.com/turt2live/matrix-media-repo/url_previewing"
 	"github.com/turt2live/matrix-media-repo/util"
 	"golang.org/x/sync/singleflight"
 )
@@ -56,7 +56,7 @@ func Execute(ctx rcontext.RequestContext, onHost string, previewUrl string, user
 			UrlString: previewUrl,
 			ParsedUrl: parsedUrl,
 		}
-		var preview url_previewers2.PreviewResult
+		var preview url_previewers2.Result
 		err = url_previewers2.ErrPreviewUnsupported
 
 		// Step 5: Try oEmbed
diff --git a/url_previewers/acl.go b/url_previewing/acl.go
similarity index 99%
rename from url_previewers/acl.go
rename to url_previewing/acl.go
index 00754135..6bb3372b 100644
--- a/url_previewers/acl.go
+++ b/url_previewing/acl.go
@@ -1,4 +1,4 @@
-package url_previewers
+package url_previewing
 
 import (
 	"net"
diff --git a/url_previewers/calculated_previewer.go b/url_previewing/calculated_previewer.go
similarity index 66%
rename from url_previewers/calculated_previewer.go
rename to url_previewing/calculated_previewer.go
index 753ca943..0720c8c6 100644
--- a/url_previewers/calculated_previewer.go
+++ b/url_previewing/calculated_previewer.go
@@ -1,37 +1,31 @@
-package url_previewers
+package url_previewing
 
 import (
-	bytes2 "bytes"
-
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/ryanuber/go-glob"
 	"github.com/turt2live/matrix-media-repo/common"
 	"github.com/turt2live/matrix-media-repo/common/rcontext"
 	"github.com/turt2live/matrix-media-repo/metrics"
-	"github.com/turt2live/matrix-media-repo/util/stream_util"
 )
 
-func GenerateCalculatedPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (PreviewResult, error) {
-	bytes, filename, contentType, contentLength, err := downloadRawContent(urlPayload, ctx.Config.UrlPreviews.FilePreviewTypes, languageHeader, ctx)
+func GenerateCalculatedPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (Result, error) {
+	r, filename, contentType, err := downloadRawContent(urlPayload, ctx.Config.UrlPreviews.FilePreviewTypes, languageHeader, ctx)
 	if err != nil {
 		ctx.Log.Error("Error downloading content: ", err)
 
 		// Make sure the unsupported error gets passed through
 		if err == ErrPreviewUnsupported {
-			return PreviewResult{}, ErrPreviewUnsupported
+			return Result{}, ErrPreviewUnsupported
 		}
 
 		// We'll consider it not found for the sake of processing
-		return PreviewResult{}, common.ErrMediaNotFound
+		return Result{}, common.ErrMediaNotFound
 	}
 
-	stream := stream_util.BufferToStream(bytes2.NewBuffer(bytes))
-	img := &PreviewImage{
-		Data:                stream,
-		ContentType:         contentType,
-		Filename:            filename,
-		ContentLength:       int64(len(bytes)),
-		ContentLengthHeader: contentLength,
+	img := &Image{
+		Data:        r,
+		ContentType: contentType,
+		Filename:    filename,
 	}
 
 	description := ""
@@ -47,7 +41,7 @@ func GenerateCalculatedPreview(urlPayload *UrlPayload, languageHeader string, ct
 		description = ""
 	}
 
-	result := &PreviewResult{
+	result := &Result{
 		Type:        "", // intentionally empty
 		Url:         urlPayload.ParsedUrl.String(),
 		Title:       summarize(filename, ctx.Config.UrlPreviews.NumTitleWords, ctx.Config.UrlPreviews.MaxTitleLength),
diff --git a/url_previewers/http.go b/url_previewing/http.go
similarity index 82%
rename from url_previewers/http.go
rename to url_previewing/http.go
index a6763d1b..553bd524 100644
--- a/url_previewers/http.go
+++ b/url_previewing/http.go
@@ -1,4 +1,4 @@
-package url_previewers
+package url_previewing
 
 import (
 	"context"
@@ -15,7 +15,7 @@ import (
 	"github.com/turt2live/matrix-media-repo/common"
 	"github.com/turt2live/matrix-media-repo/common/rcontext"
 	"github.com/turt2live/matrix-media-repo/util"
-	"github.com/turt2live/matrix-media-repo/util/stream_util"
+	"github.com/turt2live/matrix-media-repo/util/readers"
 )
 
 func doHttpGet(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (*http.Response, error) {
@@ -120,38 +120,33 @@ func doHttpGet(urlPayload *UrlPayload, languageHeader string, ctx rcontext.Reque
 	return client.Do(req)
 }
 
-func downloadRawContent(urlPayload *UrlPayload, supportedTypes []string, languageHeader string, ctx rcontext.RequestContext) ([]byte, string, string, string, error) {
+func downloadRawContent(urlPayload *UrlPayload, supportedTypes []string, languageHeader string, ctx rcontext.RequestContext) (io.ReadCloser, string, string, error) {
 	ctx.Log.Info("Fetching remote content...")
 	resp, err := doHttpGet(urlPayload, languageHeader, ctx)
 	if err != nil {
-		return nil, "", "", "", err
+		return nil, "", "", err
 	}
 	if resp.StatusCode != http.StatusOK {
 		ctx.Log.Warn("Received status code " + strconv.Itoa(resp.StatusCode))
-		return nil, "", "", "", errors.New("error during transfer")
+		return nil, "", "", errors.New("error during transfer")
 	}
 
 	if ctx.Config.UrlPreviews.MaxPageSizeBytes > 0 && resp.ContentLength >= 0 && resp.ContentLength > ctx.Config.UrlPreviews.MaxPageSizeBytes {
-		return nil, "", "", "", common.ErrMediaTooLarge
+		return nil, "", "", common.ErrMediaTooLarge
 	}
 
-	var reader io.Reader
-	reader = resp.Body
+	var reader io.ReadCloser
 	if ctx.Config.UrlPreviews.MaxPageSizeBytes > 0 {
-		reader = io.LimitReader(resp.Body, ctx.Config.UrlPreviews.MaxPageSizeBytes)
+		lr := io.LimitReader(resp.Body, ctx.Config.UrlPreviews.MaxPageSizeBytes)
+		reader = readers.NewCancelCloser(io.NopCloser(lr), func() {
+			resp.Body.Close()
+		})
 	}
 
-	bytes, err := io.ReadAll(reader)
-	if err != nil {
-		return nil, "", "", "", err
-	}
-
-	defer stream_util.DumpAndCloseStream(resp.Body)
-
 	contentType := resp.Header.Get("Content-Type")
 	for _, supportedType := range supportedTypes {
 		if !glob.Glob(supportedType, contentType) {
-			return nil, "", "", "", ErrPreviewUnsupported
+			return nil, "", "", ErrPreviewUnsupported
 		}
 	}
 
@@ -162,19 +157,24 @@ func downloadRawContent(urlPayload *UrlPayload, supportedTypes []string, languag
 		filename = params["filename"]
 	}
 
-	return bytes, filename, contentType, resp.Header.Get("Content-Length"), nil
+	return reader, filename, contentType, nil
 }
 
 func downloadHtmlContent(urlPayload *UrlPayload, supportedTypes []string, languageHeader string, ctx rcontext.RequestContext) (string, error) {
-	raw, _, contentType, _, err := downloadRawContent(urlPayload, supportedTypes, languageHeader, ctx)
+	r, _, contentType, err := downloadRawContent(urlPayload, supportedTypes, languageHeader, ctx)
+	if err != nil {
+		return "", err
+	}
 	html := ""
+	defer r.Close()
+	raw, _ := io.ReadAll(r)
 	if raw != nil {
 		html = util.ToUtf8(string(raw), contentType)
 	}
-	return html, err
+	return html, nil
 }
 
-func downloadImage(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (*PreviewImage, error) {
+func downloadImage(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (*Image, error) {
 	ctx.Log.Info("Getting image from " + urlPayload.ParsedUrl.String())
 	resp, err := doHttpGet(urlPayload, languageHeader, ctx)
 	if err != nil {
@@ -185,11 +185,10 @@ func downloadImage(urlPayload *UrlPayload, languageHeader string, ctx rcontext.R
 		return nil, errors.New("error during transfer")
 	}
 
-	image := &PreviewImage{
-		ContentType:         resp.Header.Get("Content-Type"),
-		Data:                resp.Body,
-		ContentLength:       resp.ContentLength,
-		ContentLengthHeader: resp.Header.Get("Content-Length"),
+	image := &Image{
+		ContentType:   resp.Header.Get("Content-Type"),
+		Data:          resp.Body,
+		ContentLength: resp.ContentLength,
 	}
 
 	_, params, err := mime.ParseMediaType(resp.Header.Get("Content-Disposition"))
diff --git a/url_previewers/oembed_previewer.go b/url_previewing/oembed_previewer.go
similarity index 92%
rename from url_previewers/oembed_previewer.go
rename to url_previewing/oembed_previewer.go
index 8a86c2e2..423e2be5 100644
--- a/url_previewers/oembed_previewer.go
+++ b/url_previewing/oembed_previewer.go
@@ -1,4 +1,4 @@
-package url_previewers
+package url_previewing
 
 import (
 	"bytes"
@@ -41,10 +41,10 @@ func getOembed() *oembed.Oembed {
 	return oembedInstance
 }
 
-func GenerateOEmbedPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (PreviewResult, error) {
+func GenerateOEmbedPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (Result, error) {
 	item := getOembed().FindItem(urlPayload.ParsedUrl.String())
 	if item == nil {
-		return PreviewResult{}, ErrPreviewUnsupported
+		return Result{}, ErrPreviewUnsupported
 	}
 
 	info, err := item.FetchOembed(oembed.Options{
@@ -53,7 +53,7 @@ func GenerateOEmbedPreview(urlPayload *UrlPayload, languageHeader string, ctx rc
 	})
 	if err != nil {
 		ctx.Log.Error("Error getting oEmbed: ", err)
-		return PreviewResult{}, err
+		return Result{}, err
 	}
 
 	if info.Type == "rich" {
@@ -62,7 +62,7 @@ func GenerateOEmbedPreview(urlPayload *UrlPayload, languageHeader string, ctx rc
 		info.ThumbnailURL = info.URL
 	}
 
-	graph := &PreviewResult{
+	graph := &Result{
 		Type:        info.Type,
 		Url:         info.URL,
 		Title:       info.Title,
diff --git a/url_previewers/opengraph_previewer.go b/url_previewing/opengraph_previewer.go
similarity index 94%
rename from url_previewers/opengraph_previewer.go
rename to url_previewing/opengraph_previewer.go
index e4e62e60..288beb41 100644
--- a/url_previewers/opengraph_previewer.go
+++ b/url_previewing/opengraph_previewer.go
@@ -1,4 +1,4 @@
-package url_previewers
+package url_previewing
 
 import (
 	"net/url"
@@ -18,25 +18,25 @@ import (
 
 var ogSupportedTypes = []string{"text/*"}
 
-func GenerateOpenGraphPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (PreviewResult, error) {
+func GenerateOpenGraphPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (Result, error) {
 	html, err := downloadHtmlContent(urlPayload, ogSupportedTypes, languageHeader, ctx)
 	if err != nil {
 		ctx.Log.Error("Error downloading content: ", err)
 
 		// Make sure the unsupported error gets passed through
 		if err == ErrPreviewUnsupported {
-			return PreviewResult{}, ErrPreviewUnsupported
+			return Result{}, ErrPreviewUnsupported
 		}
 
 		// We'll consider it not found for the sake of processing
-		return PreviewResult{}, common.ErrMediaNotFound
+		return Result{}, common.ErrMediaNotFound
 	}
 
 	og := opengraph.NewOpenGraph()
 	err = og.ProcessHTML(strings.NewReader(html))
 	if err != nil {
 		ctx.Log.Error("Error getting OpenGraph: ", err)
-		return PreviewResult{}, err
+		return Result{}, err
 	}
 
 	if og.Title == "" {
@@ -53,7 +53,7 @@ func GenerateOpenGraphPreview(urlPayload *UrlPayload, languageHeader string, ctx
 	og.Title = summarize(og.Title, ctx.Config.UrlPreviews.NumTitleWords, ctx.Config.UrlPreviews.MaxTitleLength)
 	og.Description = summarize(og.Description, ctx.Config.UrlPreviews.NumWords, ctx.Config.UrlPreviews.MaxLength)
 
-	graph := &PreviewResult{
+	graph := &Result{
 		Type:        og.Type,
 		Url:         og.URL,
 		Title:       og.Title,
diff --git a/url_previewers/types.go b/url_previewing/types.go
similarity index 53%
rename from url_previewers/types.go
rename to url_previewing/types.go
index 2d245c99..dd330fac 100644
--- a/url_previewers/types.go
+++ b/url_previewing/types.go
@@ -1,4 +1,4 @@
-package url_previewers
+package url_previewing
 
 import (
 	"errors"
@@ -6,21 +6,19 @@ import (
 	"net/url"
 )
 
-type PreviewResult struct {
+type Result struct {
 	Url         string
 	SiteName    string
 	Type        string
 	Description string
 	Title       string
-	Image       *PreviewImage
+	Image       *Image
 }
 
-type PreviewImage struct {
-	ContentType         string
-	Data                io.ReadCloser
-	Filename            string
-	ContentLength       int64
-	ContentLengthHeader string
+type Image struct {
+	ContentType string
+	Data        io.ReadCloser
+	Filename    string
 }
 
 type UrlPayload struct {
diff --git a/url_previewers/util.go b/url_previewing/util.go
similarity index 97%
rename from url_previewers/util.go
rename to url_previewing/util.go
index 844ed551..bcc3b11c 100644
--- a/url_previewers/util.go
+++ b/url_previewing/util.go
@@ -1,4 +1,4 @@
-package url_previewers
+package url_previewing
 
 import (
 	"regexp"
-- 
GitLab