diff --git a/pipelines/_steps/url_preview/preview.go b/pipelines/_steps/url_preview/preview.go new file mode 100644 index 0000000000000000000000000000000000000000..b06396464a7ad9fb77771bdada247d004efbe651 --- /dev/null +++ b/pipelines/_steps/url_preview/preview.go @@ -0,0 +1,32 @@ +package url_preview + +import ( + "github.com/turt2live/matrix-media-repo/common/rcontext" + "github.com/turt2live/matrix-media-repo/url_previewing/m" + "github.com/turt2live/matrix-media-repo/url_previewing/p" +) + +func Preview(ctx rcontext.RequestContext, targetUrl *m.UrlPayload, languageHeader string) (m.PreviewResult, error) { + var preview m.PreviewResult + err := m.ErrPreviewUnsupported + + // Try oEmbed first + if ctx.Config.UrlPreviews.OEmbed { + ctx.Log.Debug("Trying oEmbed previewer") + preview, err = p.GenerateOEmbedPreview(targetUrl, languageHeader, ctx) + } + + // Try OpenGraph if that failed + if err == m.ErrPreviewUnsupported { + ctx.Log.Debug("Trying OpenGraph previewer") + preview, err = p.GenerateOpenGraphPreview(targetUrl, languageHeader, ctx) + } + + // Try scraping if that failed + if err == m.ErrPreviewUnsupported { + ctx.Log.Debug("Trying built-in previewer") + preview, err = p.GenerateCalculatedPreview(targetUrl, languageHeader, ctx) + } + + return preview, err +} diff --git a/pipelines/_steps/url_preview/process.go b/pipelines/_steps/url_preview/process.go new file mode 100644 index 0000000000000000000000000000000000000000..90044f3f28e0814ed8c8902e01f7d53bf7b5b3af --- /dev/null +++ b/pipelines/_steps/url_preview/process.go @@ -0,0 +1,51 @@ +package url_preview + +import ( + "github.com/getsentry/sentry-go" + "github.com/turt2live/matrix-media-repo/common" + "github.com/turt2live/matrix-media-repo/common/rcontext" + "github.com/turt2live/matrix-media-repo/database" + "github.com/turt2live/matrix-media-repo/url_previewing/m" + "github.com/turt2live/matrix-media-repo/util" +) + +func Process(ctx rcontext.RequestContext, previewUrl string, preview m.PreviewResult, err error, onHost string, userId string, languageHeader string, ts int64) (*database.DbUrlPreview, error) { + previewDb := database.GetInstance().UrlPreviews.Prepare(ctx) + + if err != nil { + if err == m.ErrPreviewUnsupported { + err = common.ErrMediaNotFound + } + + if err == common.ErrMediaNotFound { + previewDb.InsertError(previewUrl, common.ErrCodeNotFound) + } else { + previewDb.InsertError(previewUrl, common.ErrCodeUnknown) + } + return nil, err + } else { + result := &database.DbUrlPreview{ + Url: previewUrl, + ErrorCode: "", + BucketTs: util.GetHourBucket(ts), + SiteUrl: preview.Url, + SiteName: preview.SiteName, + ResourceType: preview.Type, + Description: preview.Description, + Title: preview.Title, + LanguageHeader: languageHeader, + } + + // Step 7: Store the thumbnail, if needed + UploadImage(ctx, preview.Image, onHost, userId, result) + + // Step 8: Insert the record + err = previewDb.Insert(result) + if err != nil { + ctx.Log.Warn("Non-fatal error caching URL preview: ", err) + sentry.CaptureException(err) + } + + return result, nil + } +} diff --git a/pipelines/pipeline_preview/pipeline.go b/pipelines/pipeline_preview/pipeline.go index 9f8978d16ebdcb54e99981a7719e559b54aebd91..08f6ad103e4a1af6560e187e982266ccfe315790 100644 --- a/pipelines/pipeline_preview/pipeline.go +++ b/pipelines/pipeline_preview/pipeline.go @@ -5,13 +5,11 @@ import ( "fmt" "net/url" - "github.com/getsentry/sentry-go" "github.com/turt2live/matrix-media-repo/common" "github.com/turt2live/matrix-media-repo/common/rcontext" "github.com/turt2live/matrix-media-repo/database" "github.com/turt2live/matrix-media-repo/pipelines/_steps/url_preview" "github.com/turt2live/matrix-media-repo/url_previewing/m" - "github.com/turt2live/matrix-media-repo/url_previewing/p" "github.com/turt2live/matrix-media-repo/util" "golang.org/x/sync/singleflight" ) @@ -53,68 +51,15 @@ func Execute(ctx rcontext.RequestContext, onHost string, previewUrl string, user // Step 4: Join the singleflight queue r, err, _ := sf.Do(fmt.Sprintf("%s:%s_%d/%s", onHost, previewUrl, opts.Timestamp, opts.LanguageHeader), func() (interface{}, error) { - payload := &m.UrlPayload{ + // Step 5: Generate preview + var preview m.PreviewResult + preview, err = url_preview.Preview(ctx, &m.UrlPayload{ UrlString: previewUrl, ParsedUrl: parsedUrl, - } - var preview m.PreviewResult - err = m.ErrPreviewUnsupported - - // Step 5: Try oEmbed - if ctx.Config.UrlPreviews.OEmbed { - ctx.Log.Debug("Trying oEmbed previewer") - preview, err = p.GenerateOEmbedPreview(payload, opts.LanguageHeader, ctx) - } - - // Step 6: Try OpenGraph - if err == m.ErrPreviewUnsupported { - ctx.Log.Debug("Trying OpenGraph previewer") - preview, err = p.GenerateOpenGraphPreview(payload, opts.LanguageHeader, ctx) - } - - // Step 7: Try scraping - if err == m.ErrPreviewUnsupported { - ctx.Log.Debug("Trying built-in previewer") - preview, err = p.GenerateCalculatedPreview(payload, opts.LanguageHeader, ctx) - } - - // Step 8: Finish processing - if err != nil { - if err == m.ErrPreviewUnsupported { - err = common.ErrMediaNotFound - } - - if err == common.ErrMediaNotFound { - previewDb.InsertError(previewUrl, common.ErrCodeNotFound) - } else { - previewDb.InsertError(previewUrl, common.ErrCodeUnknown) - } - return nil, err - } else { - result := &database.DbUrlPreview{ - Url: previewUrl, - ErrorCode: "", - BucketTs: util.GetHourBucket(opts.Timestamp), - SiteUrl: preview.Url, - SiteName: preview.SiteName, - ResourceType: preview.Type, - Description: preview.Description, - Title: preview.Title, - LanguageHeader: opts.LanguageHeader, - } - - // Step 9: Store the thumbnail, if needed - url_preview.UploadImage(ctx, preview.Image, onHost, userId, result) - - // Step 10: Insert the record - err = previewDb.Insert(result) - if err != nil { - ctx.Log.Warn("Non-fatal error caching URL preview: ", err) - sentry.CaptureException(err) - } + }, opts.LanguageHeader) - return result, nil - } + // Step 6: Finish processing + return url_preview.Process(ctx, previewUrl, preview, err, onHost, userId, opts.LanguageHeader, opts.Timestamp) }) if err != nil { return nil, err