From 1fd3cb314774fc653742f7eaa34b4225de37680c Mon Sep 17 00:00:00 2001
From: Travis Ralston <travpc@gmail.com>
Date: Tue, 4 Jul 2023 23:27:52 -0600
Subject: [PATCH] Remove now-defunct preview controller to clear errors

---
 .../preview_controller/preview_controller.go  |  94 ---------
 .../preview_resource_handler.go               | 196 ------------------
 2 files changed, 290 deletions(-)
 delete mode 100644 controllers/preview_controller/preview_controller.go
 delete mode 100644 controllers/preview_controller/preview_resource_handler.go

diff --git a/controllers/preview_controller/preview_controller.go b/controllers/preview_controller/preview_controller.go
deleted file mode 100644
index a6936453..00000000
--- a/controllers/preview_controller/preview_controller.go
+++ /dev/null
@@ -1,94 +0,0 @@
-package preview_controller
-
-import (
-	"database/sql"
-	"errors"
-	"fmt"
-	"net/url"
-
-	"github.com/sirupsen/logrus"
-	"github.com/turt2live/matrix-media-repo/common"
-	"github.com/turt2live/matrix-media-repo/common/globals"
-	"github.com/turt2live/matrix-media-repo/common/rcontext"
-	"github.com/turt2live/matrix-media-repo/storage"
-	"github.com/turt2live/matrix-media-repo/storage/stores"
-	"github.com/turt2live/matrix-media-repo/types"
-	"github.com/turt2live/matrix-media-repo/url_previewing/m"
-	"github.com/turt2live/matrix-media-repo/util"
-)
-
-func GetPreview(urlStr string, onHost string, forUserId string, atTs int64, languageHeader string, ctx rcontext.RequestContext) (*types.UrlPreview, error) {
-	atTs = stores.GetBucketTs(atTs)
-	cacheKey := fmt.Sprintf("%d_%s/%s", atTs, onHost, urlStr)
-	v, _, err := globals.DefaultRequestGroup.DoWithoutPost(cacheKey, func() (interface{}, error) {
-
-		ctx := ctx.LogWithFields(logrus.Fields{
-			"preview_controller_at_ts": atTs,
-		})
-
-		db := storage.GetDatabase().GetUrlStore(ctx)
-
-		cached, err := db.GetPreview(urlStr, atTs, languageHeader)
-		if err != nil && err != sql.ErrNoRows {
-			ctx.Log.Error("Error getting cached URL preview: ", err.Error())
-			return nil, err
-		}
-		if err != sql.ErrNoRows {
-			ctx.Log.Info("Returning cached URL preview")
-			return cachedPreviewToReal(cached)
-		}
-
-		now := util.NowMillis()
-		atTsBk := stores.GetBucketTs(atTs)
-		nowBk := stores.GetBucketTs(now)
-		if (now-atTs) > 60000 && atTsBk != nowBk {
-			// Because we don't have a cached preview, we'll use the current time as the preview time.
-			// We also give a 60 second buffer so we don't cause an infinite loop (considering we're
-			// calling ourselves), and to give a lenient opportunity for slow execution.
-			return GetPreview(urlStr, onHost, forUserId, now, languageHeader, ctx)
-		}
-
-		parsedUrl, err := url.Parse(urlStr)
-		if err != nil {
-			ctx.Log.Error("Error parsing URL: ", err.Error())
-			db.InsertPreviewError(urlStr, common.ErrCodeInvalidHost)
-			return nil, common.ErrInvalidHost
-		}
-		parsedUrl.Fragment = "" // Remove fragment because it's not important for servers
-		urlToPreview := &m.UrlPayload{
-			UrlString: urlStr,
-			ParsedUrl: parsedUrl,
-		}
-
-		ctx.Log.Info("Preview not cached - fetching resource")
-
-		previewChan := getResourceHandler().GeneratePreview(urlToPreview, forUserId, onHost, languageHeader, ctx.Config.UrlPreviews.OEmbed)
-		defer close(previewChan)
-
-		result := <-previewChan
-		return result.preview, result.err
-	})
-
-	var value *types.UrlPreview
-	if v != nil {
-		value = v.(*types.UrlPreview)
-	}
-
-	return value, err
-}
-
-func cachedPreviewToReal(cached *types.CachedUrlPreview) (*types.UrlPreview, error) {
-	if cached.ErrorCode == common.ErrCodeInvalidHost {
-		return nil, common.ErrInvalidHost
-	} else if cached.ErrorCode == common.ErrCodeHostNotFound {
-		return nil, common.ErrHostNotFound
-	} else if cached.ErrorCode == common.ErrCodeHostNotAllowed {
-		return nil, common.ErrHostNotAllowed
-	} else if cached.ErrorCode == common.ErrCodeNotFound {
-		return nil, common.ErrMediaNotFound
-	} else if cached.ErrorCode == common.ErrCodeUnknown {
-		return nil, errors.New("unknown error")
-	}
-
-	return cached.Preview, nil
-}
diff --git a/controllers/preview_controller/preview_resource_handler.go b/controllers/preview_controller/preview_resource_handler.go
deleted file mode 100644
index 789d7c36..00000000
--- a/controllers/preview_controller/preview_resource_handler.go
+++ /dev/null
@@ -1,196 +0,0 @@
-package preview_controller
-
-import (
-	"fmt"
-	"sync"
-
-	"github.com/getsentry/sentry-go"
-	url_previewers2 "github.com/turt2live/matrix-media-repo/url_previewing/m"
-	"github.com/turt2live/matrix-media-repo/url_previewing/p"
-	"github.com/turt2live/matrix-media-repo/util/stream_util"
-
-	"github.com/disintegration/imaging"
-	"github.com/sirupsen/logrus"
-	"github.com/turt2live/matrix-media-repo/common"
-	"github.com/turt2live/matrix-media-repo/common/config"
-	"github.com/turt2live/matrix-media-repo/common/rcontext"
-	"github.com/turt2live/matrix-media-repo/controllers/upload_controller"
-	"github.com/turt2live/matrix-media-repo/storage"
-	"github.com/turt2live/matrix-media-repo/storage/datastore"
-	"github.com/turt2live/matrix-media-repo/types"
-	"github.com/turt2live/matrix-media-repo/util"
-	"github.com/turt2live/matrix-media-repo/util/resource_handler"
-)
-
-type urlResourceHandler struct {
-	resourceHandler *resource_handler.ResourceHandler
-}
-
-type urlPreviewRequest struct {
-	urlPayload     *url_previewers2.UrlPayload
-	forUserId      string
-	onHost         string
-	languageHeader string
-	allowOEmbed    bool
-}
-
-type urlPreviewResponse struct {
-	preview *types.UrlPreview
-	err     error
-}
-
-var resHandlerInstance *urlResourceHandler
-var resHandlerSingletonLock = &sync.Once{}
-
-func getResourceHandler() *urlResourceHandler {
-	if resHandlerInstance == nil {
-		resHandlerSingletonLock.Do(func() {
-			handler, err := resource_handler.New(config.Get().UrlPreviews.NumWorkers, func(r *resource_handler.WorkRequest) interface{} {
-				return urlPreviewWorkFn(r)
-			})
-			if err != nil {
-				sentry.CaptureException(err)
-				panic(err)
-			}
-
-			resHandlerInstance = &urlResourceHandler{handler}
-		})
-	}
-
-	return resHandlerInstance
-}
-
-func urlPreviewWorkFn(request *resource_handler.WorkRequest) (resp *urlPreviewResponse) {
-	info := request.Metadata.(*urlPreviewRequest)
-	ctx := rcontext.Initial().LogWithFields(logrus.Fields{
-		"worker_requestId": request.Id,
-		"worker_url":       info.urlPayload.UrlString,
-	})
-
-	resp = &urlPreviewResponse{}
-	defer func() {
-		if err := recover(); err != nil {
-			ctx.Log.Error("Caught panic: ", err)
-			sentry.CurrentHub().Recover(err)
-			resp.preview = nil
-			resp.err = util.PanicToError(err)
-		}
-	}()
-
-	ctx.Log.Info("Processing url preview request")
-
-	db := storage.GetDatabase().GetUrlStore(ctx)
-
-	var preview url_previewers2.PreviewResult
-	err := url_previewers2.ErrPreviewUnsupported
-
-	// Try oEmbed first
-	if info.allowOEmbed {
-		ctx = ctx.LogWithFields(logrus.Fields{"worker_previewer": "oEmbed"})
-		ctx.Log.Info("Trying oEmbed previewer")
-		preview, err = p.GenerateOEmbedPreview(info.urlPayload, info.languageHeader, ctx)
-	}
-
-	// Then try OpenGraph
-	if err == url_previewers2.ErrPreviewUnsupported {
-		ctx = ctx.LogWithFields(logrus.Fields{"worker_previewer": "OpenGraph"})
-		ctx.Log.Info("oEmbed preview for this URL is unsupported or disabled - treating it as a OpenGraph")
-		preview, err = p.GenerateOpenGraphPreview(info.urlPayload, info.languageHeader, ctx)
-	}
-
-	// Finally try scraping
-	if err == url_previewers2.ErrPreviewUnsupported {
-		ctx = ctx.LogWithFields(logrus.Fields{"worker_previewer": "File"})
-		ctx.Log.Info("OpenGraph preview for this URL is unsupported - treating it as a file")
-		preview, err = p.GenerateCalculatedPreview(info.urlPayload, info.languageHeader, ctx)
-	}
-
-	if err != nil {
-		// Transparently convert "unsupported" to "not found" for processing
-		if err == url_previewers2.ErrPreviewUnsupported {
-			err = common.ErrMediaNotFound
-		}
-
-		if err == common.ErrMediaNotFound {
-			db.InsertPreviewError(info.urlPayload.UrlString, common.ErrCodeNotFound)
-		} else {
-			db.InsertPreviewError(info.urlPayload.UrlString, common.ErrCodeUnknown)
-		}
-		resp.err = err
-		return resp
-	}
-
-	result := &types.UrlPreview{
-		Url:            preview.Url,
-		SiteName:       preview.SiteName,
-		Type:           preview.Type,
-		Description:    preview.Description,
-		Title:          preview.Title,
-		LanguageHeader: info.languageHeader,
-	}
-
-	// Store the thumbnail, if there is one
-	if preview.Image != nil && !upload_controller.IsRequestTooLarge(preview.Image.ContentLength, preview.Image.ContentLengthHeader, ctx) {
-		contentLength := upload_controller.EstimateContentLength(preview.Image.ContentLength, preview.Image.ContentLengthHeader)
-
-		// UploadMedia will close the read stream for the thumbnail and dedupe the image
-		media, err := upload_controller.UploadMedia(preview.Image.Data, contentLength, preview.Image.ContentType, preview.Image.Filename, info.forUserId, info.onHost, ctx)
-		if err != nil {
-			ctx.Log.Warn("Non-fatal error storing preview thumbnail: ", err)
-			sentry.CaptureException(err)
-		} else {
-			mediaStream, err := datastore.DownloadStream(ctx, media.DatastoreId, media.Location)
-			if err != nil {
-				ctx.Log.Warn("Non-fatal error streaming datastore file: ", err)
-				sentry.CaptureException(err)
-			} else {
-				defer stream_util.DumpAndCloseStream(mediaStream)
-				img, err := imaging.Decode(mediaStream)
-				if err != nil {
-					ctx.Log.Warn("Non-fatal error getting thumbnail dimensions: ", err)
-					sentry.CaptureException(err)
-				} else {
-					result.ImageMxc = media.MxcUri()
-					result.ImageType = media.ContentType
-					result.ImageSize = media.SizeBytes
-					result.ImageWidth = img.Bounds().Max.X
-					result.ImageHeight = img.Bounds().Max.Y
-				}
-			}
-		}
-	}
-
-	dbRecord := &types.CachedUrlPreview{
-		Preview:   result,
-		SearchUrl: info.urlPayload.UrlString,
-		ErrorCode: "",
-		FetchedTs: util.NowMillis(),
-	}
-	err = db.InsertPreview(dbRecord)
-	if err != nil {
-		ctx.Log.Warn("Error caching URL preview: ", err)
-		sentry.CaptureException(err)
-		// Non-fatal: Just report it and move on. The worst that happens is we re-cache it.
-	}
-
-	resp.preview = result
-	return resp
-}
-
-func (h *urlResourceHandler) GeneratePreview(urlPayload *url_previewers2.UrlPayload, forUserId string, onHost string, languageHeader string, allowOEmbed bool) chan *urlPreviewResponse {
-	resultChan := make(chan *urlPreviewResponse)
-	go func() {
-		reqId := fmt.Sprintf("preview_%s", urlPayload.UrlString) // don't put the user id or host in the ID string
-		c := h.resourceHandler.GetResource(reqId, &urlPreviewRequest{
-			urlPayload:     urlPayload,
-			forUserId:      forUserId,
-			onHost:         onHost,
-			languageHeader: languageHeader,
-			allowOEmbed:    allowOEmbed,
-		})
-		defer close(c)
-		result := <-c
-		resultChan <- result.(*urlPreviewResponse)
-	}()
-	return resultChan
-}
-- 
GitLab