diff --git a/controllers/preview_controller/preview_controller.go b/controllers/preview_controller/preview_controller.go index 1a58ee83c45aeeb4aa99ec90fa9495af0b3198e9..a4e6b6a4f28caa6836814dd7dbd8c38f8079b183 100644 --- a/controllers/preview_controller/preview_controller.go +++ b/controllers/preview_controller/preview_controller.go @@ -13,7 +13,7 @@ import ( "github.com/turt2live/matrix-media-repo/storage" "github.com/turt2live/matrix-media-repo/storage/stores" "github.com/turt2live/matrix-media-repo/types" - "github.com/turt2live/matrix-media-repo/url_previewers" + "github.com/turt2live/matrix-media-repo/url_previewing" "github.com/turt2live/matrix-media-repo/util" ) @@ -55,7 +55,7 @@ func GetPreview(urlStr string, onHost string, forUserId string, atTs int64, lang return nil, common.ErrInvalidHost } parsedUrl.Fragment = "" // Remove fragment because it's not important for servers - urlToPreview := &url_previewers.UrlPayload{ + urlToPreview := &url_previewing.UrlPayload{ UrlString: urlStr, ParsedUrl: parsedUrl, } diff --git a/controllers/preview_controller/preview_resource_handler.go b/controllers/preview_controller/preview_resource_handler.go index b45dfd03eeaadafb491a7d70c94953872fbc9ff6..0c7c5ac0467dd9b539e9a0bf893b9ee2fc30329e 100644 --- a/controllers/preview_controller/preview_resource_handler.go +++ b/controllers/preview_controller/preview_resource_handler.go @@ -5,7 +5,7 @@ import ( "sync" "github.com/getsentry/sentry-go" - url_previewers2 "github.com/turt2live/matrix-media-repo/url_previewers" + url_previewers2 "github.com/turt2live/matrix-media-repo/url_previewing" "github.com/turt2live/matrix-media-repo/util/stream_util" "github.com/disintegration/imaging" @@ -80,7 +80,7 @@ func urlPreviewWorkFn(request *resource_handler.WorkRequest) (resp *urlPreviewRe db := storage.GetDatabase().GetUrlStore(ctx) - var preview url_previewers2.PreviewResult + var preview url_previewers2.Result err := url_previewers2.ErrPreviewUnsupported // Try oEmbed first diff --git a/pipelines/_steps/url_preview/upload_image.go b/pipelines/_steps/url_preview/upload_image.go index 26dd92e5713069a374e9d8607a02804eff672df4..791d3812826b21c814627b1326d7d9e1e298c3bb 100644 --- a/pipelines/_steps/url_preview/upload_image.go +++ b/pipelines/_steps/url_preview/upload_image.go @@ -9,15 +9,16 @@ import ( "github.com/turt2live/matrix-media-repo/datastores" "github.com/turt2live/matrix-media-repo/pipelines/pipeline_upload" "github.com/turt2live/matrix-media-repo/thumbnailing" - "github.com/turt2live/matrix-media-repo/url_previewers" + "github.com/turt2live/matrix-media-repo/url_previewing" "github.com/turt2live/matrix-media-repo/util" ) -func UploadImage(ctx rcontext.RequestContext, image *url_previewers.PreviewImage, onHost string, userId string, forRecord *database.DbUrlPreview) { - if image == nil { +func UploadImage(ctx rcontext.RequestContext, image *url_previewing.Image, onHost string, userId string, forRecord *database.DbUrlPreview) { + if image == nil || image.Data == nil { return } + defer image.Data.Close() pr, pw := io.Pipe() tee := io.TeeReader(image.Data, pw) mediaChan := make(chan *database.DbMedia) diff --git a/pipelines/pipeline_preview/pipeline.go b/pipelines/pipeline_preview/pipeline.go index 9f1156a9ab394d207d0e892872b05857be3b6fb2..bd5312ee87861f18ed49f8ccd6f0db2f5afa1f6e 100644 --- a/pipelines/pipeline_preview/pipeline.go +++ b/pipelines/pipeline_preview/pipeline.go @@ -10,7 +10,7 @@ import ( "github.com/turt2live/matrix-media-repo/common/rcontext" "github.com/turt2live/matrix-media-repo/database" "github.com/turt2live/matrix-media-repo/pipelines/_steps/url_preview" - url_previewers2 "github.com/turt2live/matrix-media-repo/url_previewers" + url_previewers2 "github.com/turt2live/matrix-media-repo/url_previewing" "github.com/turt2live/matrix-media-repo/util" "golang.org/x/sync/singleflight" ) @@ -56,7 +56,7 @@ func Execute(ctx rcontext.RequestContext, onHost string, previewUrl string, user UrlString: previewUrl, ParsedUrl: parsedUrl, } - var preview url_previewers2.PreviewResult + var preview url_previewers2.Result err = url_previewers2.ErrPreviewUnsupported // Step 5: Try oEmbed diff --git a/url_previewers/acl.go b/url_previewing/acl.go similarity index 99% rename from url_previewers/acl.go rename to url_previewing/acl.go index 00754135a03ce4bc281d2a3ab5376d7ed72df693..6bb3372b58eed3f02818b3090b023450b9215bbf 100644 --- a/url_previewers/acl.go +++ b/url_previewing/acl.go @@ -1,4 +1,4 @@ -package url_previewers +package url_previewing import ( "net" diff --git a/url_previewers/calculated_previewer.go b/url_previewing/calculated_previewer.go similarity index 66% rename from url_previewers/calculated_previewer.go rename to url_previewing/calculated_previewer.go index 753ca943d329958a3b2a9c3cca2cccbaa00d9441..0720c8c657539dee57a5a90e57e2a46335cde2bb 100644 --- a/url_previewers/calculated_previewer.go +++ b/url_previewing/calculated_previewer.go @@ -1,37 +1,31 @@ -package url_previewers +package url_previewing import ( - bytes2 "bytes" - "github.com/prometheus/client_golang/prometheus" "github.com/ryanuber/go-glob" "github.com/turt2live/matrix-media-repo/common" "github.com/turt2live/matrix-media-repo/common/rcontext" "github.com/turt2live/matrix-media-repo/metrics" - "github.com/turt2live/matrix-media-repo/util/stream_util" ) -func GenerateCalculatedPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (PreviewResult, error) { - bytes, filename, contentType, contentLength, err := downloadRawContent(urlPayload, ctx.Config.UrlPreviews.FilePreviewTypes, languageHeader, ctx) +func GenerateCalculatedPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (Result, error) { + r, filename, contentType, err := downloadRawContent(urlPayload, ctx.Config.UrlPreviews.FilePreviewTypes, languageHeader, ctx) if err != nil { ctx.Log.Error("Error downloading content: ", err) // Make sure the unsupported error gets passed through if err == ErrPreviewUnsupported { - return PreviewResult{}, ErrPreviewUnsupported + return Result{}, ErrPreviewUnsupported } // We'll consider it not found for the sake of processing - return PreviewResult{}, common.ErrMediaNotFound + return Result{}, common.ErrMediaNotFound } - stream := stream_util.BufferToStream(bytes2.NewBuffer(bytes)) - img := &PreviewImage{ - Data: stream, - ContentType: contentType, - Filename: filename, - ContentLength: int64(len(bytes)), - ContentLengthHeader: contentLength, + img := &Image{ + Data: r, + ContentType: contentType, + Filename: filename, } description := "" @@ -47,7 +41,7 @@ func GenerateCalculatedPreview(urlPayload *UrlPayload, languageHeader string, ct description = "" } - result := &PreviewResult{ + result := &Result{ Type: "", // intentionally empty Url: urlPayload.ParsedUrl.String(), Title: summarize(filename, ctx.Config.UrlPreviews.NumTitleWords, ctx.Config.UrlPreviews.MaxTitleLength), diff --git a/url_previewers/http.go b/url_previewing/http.go similarity index 82% rename from url_previewers/http.go rename to url_previewing/http.go index a6763d1b4d76d5d6154d518b9951ab874efd3d83..553bd524044e62d2c497291a820702641e1292b6 100644 --- a/url_previewers/http.go +++ b/url_previewing/http.go @@ -1,4 +1,4 @@ -package url_previewers +package url_previewing import ( "context" @@ -15,7 +15,7 @@ import ( "github.com/turt2live/matrix-media-repo/common" "github.com/turt2live/matrix-media-repo/common/rcontext" "github.com/turt2live/matrix-media-repo/util" - "github.com/turt2live/matrix-media-repo/util/stream_util" + "github.com/turt2live/matrix-media-repo/util/readers" ) func doHttpGet(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (*http.Response, error) { @@ -120,38 +120,33 @@ func doHttpGet(urlPayload *UrlPayload, languageHeader string, ctx rcontext.Reque return client.Do(req) } -func downloadRawContent(urlPayload *UrlPayload, supportedTypes []string, languageHeader string, ctx rcontext.RequestContext) ([]byte, string, string, string, error) { +func downloadRawContent(urlPayload *UrlPayload, supportedTypes []string, languageHeader string, ctx rcontext.RequestContext) (io.ReadCloser, string, string, error) { ctx.Log.Info("Fetching remote content...") resp, err := doHttpGet(urlPayload, languageHeader, ctx) if err != nil { - return nil, "", "", "", err + return nil, "", "", err } if resp.StatusCode != http.StatusOK { ctx.Log.Warn("Received status code " + strconv.Itoa(resp.StatusCode)) - return nil, "", "", "", errors.New("error during transfer") + return nil, "", "", errors.New("error during transfer") } if ctx.Config.UrlPreviews.MaxPageSizeBytes > 0 && resp.ContentLength >= 0 && resp.ContentLength > ctx.Config.UrlPreviews.MaxPageSizeBytes { - return nil, "", "", "", common.ErrMediaTooLarge + return nil, "", "", common.ErrMediaTooLarge } - var reader io.Reader - reader = resp.Body + var reader io.ReadCloser if ctx.Config.UrlPreviews.MaxPageSizeBytes > 0 { - reader = io.LimitReader(resp.Body, ctx.Config.UrlPreviews.MaxPageSizeBytes) + lr := io.LimitReader(resp.Body, ctx.Config.UrlPreviews.MaxPageSizeBytes) + reader = readers.NewCancelCloser(io.NopCloser(lr), func() { + resp.Body.Close() + }) } - bytes, err := io.ReadAll(reader) - if err != nil { - return nil, "", "", "", err - } - - defer stream_util.DumpAndCloseStream(resp.Body) - contentType := resp.Header.Get("Content-Type") for _, supportedType := range supportedTypes { if !glob.Glob(supportedType, contentType) { - return nil, "", "", "", ErrPreviewUnsupported + return nil, "", "", ErrPreviewUnsupported } } @@ -162,19 +157,24 @@ func downloadRawContent(urlPayload *UrlPayload, supportedTypes []string, languag filename = params["filename"] } - return bytes, filename, contentType, resp.Header.Get("Content-Length"), nil + return reader, filename, contentType, nil } func downloadHtmlContent(urlPayload *UrlPayload, supportedTypes []string, languageHeader string, ctx rcontext.RequestContext) (string, error) { - raw, _, contentType, _, err := downloadRawContent(urlPayload, supportedTypes, languageHeader, ctx) + r, _, contentType, err := downloadRawContent(urlPayload, supportedTypes, languageHeader, ctx) + if err != nil { + return "", err + } html := "" + defer r.Close() + raw, _ := io.ReadAll(r) if raw != nil { html = util.ToUtf8(string(raw), contentType) } - return html, err + return html, nil } -func downloadImage(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (*PreviewImage, error) { +func downloadImage(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (*Image, error) { ctx.Log.Info("Getting image from " + urlPayload.ParsedUrl.String()) resp, err := doHttpGet(urlPayload, languageHeader, ctx) if err != nil { @@ -185,11 +185,10 @@ func downloadImage(urlPayload *UrlPayload, languageHeader string, ctx rcontext.R return nil, errors.New("error during transfer") } - image := &PreviewImage{ - ContentType: resp.Header.Get("Content-Type"), - Data: resp.Body, - ContentLength: resp.ContentLength, - ContentLengthHeader: resp.Header.Get("Content-Length"), + image := &Image{ + ContentType: resp.Header.Get("Content-Type"), + Data: resp.Body, + ContentLength: resp.ContentLength, } _, params, err := mime.ParseMediaType(resp.Header.Get("Content-Disposition")) diff --git a/url_previewers/oembed_previewer.go b/url_previewing/oembed_previewer.go similarity index 92% rename from url_previewers/oembed_previewer.go rename to url_previewing/oembed_previewer.go index 8a86c2e29e55fc0b2aa1770f29cebf8b397be0d1..423e2be5d22597b532834b5fd96b5c9792611cbe 100644 --- a/url_previewers/oembed_previewer.go +++ b/url_previewing/oembed_previewer.go @@ -1,4 +1,4 @@ -package url_previewers +package url_previewing import ( "bytes" @@ -41,10 +41,10 @@ func getOembed() *oembed.Oembed { return oembedInstance } -func GenerateOEmbedPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (PreviewResult, error) { +func GenerateOEmbedPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (Result, error) { item := getOembed().FindItem(urlPayload.ParsedUrl.String()) if item == nil { - return PreviewResult{}, ErrPreviewUnsupported + return Result{}, ErrPreviewUnsupported } info, err := item.FetchOembed(oembed.Options{ @@ -53,7 +53,7 @@ func GenerateOEmbedPreview(urlPayload *UrlPayload, languageHeader string, ctx rc }) if err != nil { ctx.Log.Error("Error getting oEmbed: ", err) - return PreviewResult{}, err + return Result{}, err } if info.Type == "rich" { @@ -62,7 +62,7 @@ func GenerateOEmbedPreview(urlPayload *UrlPayload, languageHeader string, ctx rc info.ThumbnailURL = info.URL } - graph := &PreviewResult{ + graph := &Result{ Type: info.Type, Url: info.URL, Title: info.Title, diff --git a/url_previewers/opengraph_previewer.go b/url_previewing/opengraph_previewer.go similarity index 94% rename from url_previewers/opengraph_previewer.go rename to url_previewing/opengraph_previewer.go index e4e62e6029c161c1618307bb43a8cdfec0dfcbed..288beb41ad5cfa8b4b2090975e60a4c746602d0f 100644 --- a/url_previewers/opengraph_previewer.go +++ b/url_previewing/opengraph_previewer.go @@ -1,4 +1,4 @@ -package url_previewers +package url_previewing import ( "net/url" @@ -18,25 +18,25 @@ import ( var ogSupportedTypes = []string{"text/*"} -func GenerateOpenGraphPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (PreviewResult, error) { +func GenerateOpenGraphPreview(urlPayload *UrlPayload, languageHeader string, ctx rcontext.RequestContext) (Result, error) { html, err := downloadHtmlContent(urlPayload, ogSupportedTypes, languageHeader, ctx) if err != nil { ctx.Log.Error("Error downloading content: ", err) // Make sure the unsupported error gets passed through if err == ErrPreviewUnsupported { - return PreviewResult{}, ErrPreviewUnsupported + return Result{}, ErrPreviewUnsupported } // We'll consider it not found for the sake of processing - return PreviewResult{}, common.ErrMediaNotFound + return Result{}, common.ErrMediaNotFound } og := opengraph.NewOpenGraph() err = og.ProcessHTML(strings.NewReader(html)) if err != nil { ctx.Log.Error("Error getting OpenGraph: ", err) - return PreviewResult{}, err + return Result{}, err } if og.Title == "" { @@ -53,7 +53,7 @@ func GenerateOpenGraphPreview(urlPayload *UrlPayload, languageHeader string, ctx og.Title = summarize(og.Title, ctx.Config.UrlPreviews.NumTitleWords, ctx.Config.UrlPreviews.MaxTitleLength) og.Description = summarize(og.Description, ctx.Config.UrlPreviews.NumWords, ctx.Config.UrlPreviews.MaxLength) - graph := &PreviewResult{ + graph := &Result{ Type: og.Type, Url: og.URL, Title: og.Title, diff --git a/url_previewers/types.go b/url_previewing/types.go similarity index 53% rename from url_previewers/types.go rename to url_previewing/types.go index 2d245c99d241fe73a251b078e410ab7989771c38..dd330fac07bd5cf8e77f572e60a4d9c2b2cd6ee3 100644 --- a/url_previewers/types.go +++ b/url_previewing/types.go @@ -1,4 +1,4 @@ -package url_previewers +package url_previewing import ( "errors" @@ -6,21 +6,19 @@ import ( "net/url" ) -type PreviewResult struct { +type Result struct { Url string SiteName string Type string Description string Title string - Image *PreviewImage + Image *Image } -type PreviewImage struct { - ContentType string - Data io.ReadCloser - Filename string - ContentLength int64 - ContentLengthHeader string +type Image struct { + ContentType string + Data io.ReadCloser + Filename string } type UrlPayload struct { diff --git a/url_previewers/util.go b/url_previewing/util.go similarity index 97% rename from url_previewers/util.go rename to url_previewing/util.go index 844ed551a86926cfea40eba1a9cf4bb9cff8fa4b..bcc3b11ca209987828b0945c2e820e25a8fca321 100644 --- a/url_previewers/util.go +++ b/url_previewing/util.go @@ -1,4 +1,4 @@ -package url_previewers +package url_previewing import ( "regexp"