diff --git a/.gitignore b/.gitignore index 0edfa988adef3dd6e08aa4a2a4bd76075b61f46f..2f5d5ac5e61f7006b0111e2674e7c7bac58ffaa8 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ assets.bin.go media-repo*.yaml homeserver.yaml +s3-probably-safe-to-delete.txt # Binaries for programs and plugins *.exe diff --git a/Dockerfile b/Dockerfile index 624892e4f7c23cf54f73f85ca4798555239b2b56..0bffcc747332962f7bb3cf597a6c680df8cdaad1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ FROM alpine RUN mkdir /plugins COPY --from=builder /opt/bin/plugin_antispam_ocr /plugins/ -COPY --from=builder /opt/bin/media_repo /opt/bin/import_synapse /opt/bin/gdpr_export /opt/bin/gdpr_import /usr/local/bin/ +COPY --from=builder /opt/bin/media_repo /opt/bin/import_synapse /opt/bin/export_synapse_for_import /opt/bin/gdpr_export /opt/bin/gdpr_import /opt/bin/s3_consistency_check /usr/local/bin/ RUN apk add --no-cache \ su-exec \ diff --git a/cmd/s3_consistency_check/main.go b/cmd/s3_consistency_check/main.go new file mode 100644 index 0000000000000000000000000000000000000000..e9162a975a2a8e60bf6017cdb8622bc3f43e8742 --- /dev/null +++ b/cmd/s3_consistency_check/main.go @@ -0,0 +1,116 @@ +package main + +import ( + "flag" + "fmt" + "github.com/sirupsen/logrus" + "github.com/turt2live/matrix-media-repo/common/assets" + "github.com/turt2live/matrix-media-repo/common/config" + "github.com/turt2live/matrix-media-repo/common/logging" + "github.com/turt2live/matrix-media-repo/common/rcontext" + "github.com/turt2live/matrix-media-repo/common/runtime" + "github.com/turt2live/matrix-media-repo/storage" + "github.com/turt2live/matrix-media-repo/storage/datastore" + "os" +) + +func main() { + configPath := flag.String("config", "media-repo.yaml", "The path to the configuration") + datastoreId := flag.String("datastoreId", "", "The datastore ID to check (must be an S3 datastore)") + outFile := flag.String("outFile", "./s3-probably-safe-to-delete.txt", "File path for where to write results") + migrationsPath := flag.String("migrations", config.DefaultMigrationsPath, "The absolute path for the migrations folder") + templatesPath := flag.String("templates", config.DefaultTemplatesPath, "The absolute path for the templates folder") + flag.Parse() + + // Override config path with config for Docker users + configEnv := os.Getenv("REPO_CONFIG") + if configEnv != "" { + configPath = &configEnv + } + + config.Path = *configPath + assets.SetupMigrations(*migrationsPath) + assets.SetupTemplates(*templatesPath) + + var err error + err = logging.Setup( + config.Get().General.LogDirectory, + config.Get().General.LogColors, + config.Get().General.JsonLogs, + config.Get().General.LogLevel, + ) + if err != nil { + panic(err) + } + + logrus.Info("Starting up...") + runtime.RunStartupSequence() + + logrus.Info("Scanning datastore: ", *datastoreId) + ctx := rcontext.Initial().LogWithFields(logrus.Fields{"datastoreId": *datastoreId}) + ds, err := datastore.LocateDatastore(ctx, *datastoreId) + if err != nil { + panic(err) + } + + objectIds, err := ds.ListObjectIds(ctx) + if err != nil { + panic(err) + } + logrus.Infof("Got %d object IDs", len(objectIds)) + + mediaDb := storage.GetDatabase().GetMediaStore(ctx) + thumbsDb := storage.GetDatabase().GetThumbnailStore(ctx) + usedLocations := make(map[string]bool) + + logrus.Info("Scanning media for datastore: ", *datastoreId) + locations, err := mediaDb.GetDistinctLocationsForDatastore(*datastoreId) + if err != nil { + panic(err) + } + + for _, l := range locations { + usedLocations[l] = true + } + + logrus.Infof("Got %d locations", len(locations)) + + logrus.Info("Scanning thumbnails for datastore: ", *datastoreId) + locations, err = thumbsDb.GetDistinctLocationsForDatastore(*datastoreId) + if err != nil { + panic(err) + } + + for _, l := range locations { + usedLocations[l] = true + } + + logrus.Infof("Got %d locations", len(locations)) + + logrus.Info("Comparing locations known in DB to S3...") + probablyAbleToDelete := make([]string, 0) + for _, s3Location := range objectIds { + keep := usedLocations[s3Location] + if keep { + continue + } + logrus.Warnf("%s might be safe to delete from s3", s3Location) + probablyAbleToDelete = append(probablyAbleToDelete, s3Location) + } + + logrus.Info("Writing file for probably-safe-to-delete object IDs...") + f, err := os.Create(*outFile) + defer f.Close() + if err != nil { + panic(err) + } + outString := "" + for _, id := range probablyAbleToDelete { + outString += fmt.Sprintf("%s\n", id) + } + _, err = f.WriteString(outString) + if err != nil { + panic(err) + } + logrus.Info("Done!") +} diff --git a/controllers/thumbnail_controller/thumbnail_resource_handler.go b/controllers/thumbnail_controller/thumbnail_resource_handler.go index 7d084f9e3160477f142c609d2eb6c689d3f579a4..029b8ea19e70b43f3b993359d7e5a9cf24dda00d 100644 --- a/controllers/thumbnail_controller/thumbnail_resource_handler.go +++ b/controllers/thumbnail_controller/thumbnail_resource_handler.go @@ -97,7 +97,6 @@ func thumbnailWorkFn(request *resource_handler.WorkRequest) (resp *thumbnailResp return &thumbnailResponse{err: err} } - if info.animated != generated.Animated { ctx.Log.Warn("Animation state changed to ", generated.Animated) diff --git a/controllers/upload_controller/upload_controller.go b/controllers/upload_controller/upload_controller.go index bde49d608e1c6270a6a5c396eafa9dc57d2e6efe..7936faa7dc038bf31ad44dbedb6f4d5b2af61ad0 100644 --- a/controllers/upload_controller/upload_controller.go +++ b/controllers/upload_controller/upload_controller.go @@ -233,7 +233,11 @@ func StoreDirect(f *AlreadyUploadedFile, contents io.ReadCloser, expectedSize in db := storage.GetDatabase().GetMediaStore(ctx) records, err := db.GetByHash(info.Sha256Hash) if err != nil { - ds.DeleteObject(info.Location) // delete temp object + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } return nil, err } @@ -251,7 +255,13 @@ func StoreDirect(f *AlreadyUploadedFile, contents io.ReadCloser, expectedSize in } if record.UserId == userId && record.Origin == origin && record.ContentType == contentType { ctx.Log.Info("User has already uploaded this media before - returning unaltered media record") - ds.DeleteObject(info.Location) // delete temp object + + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } + trackUploadAsLastAccess(ctx, record) return record, nil } @@ -260,14 +270,22 @@ func StoreDirect(f *AlreadyUploadedFile, contents io.ReadCloser, expectedSize in err = checkSpam(contentBytes, filename, contentType, userId, origin, mediaId) if err != nil { - ds.DeleteObject(info.Location) // delete temp object + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } return nil, err } // We'll use the location from the first record record := records[0] if record.Quarantined { - ds.DeleteObject(info.Location) // delete temp object + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } ctx.Log.Warn("User attempted to upload quarantined content - rejecting") return nil, common.ErrMediaQuarantined } @@ -276,7 +294,11 @@ func StoreDirect(f *AlreadyUploadedFile, contents io.ReadCloser, expectedSize in for _, knownRecord := range records { if knownRecord.Origin == origin && knownRecord.MediaId == mediaId { ctx.Log.Info("Duplicate media record found - returning unaltered record") - ds.DeleteObject(info.Location) // delete temp object + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } trackUploadAsLastAccess(ctx, knownRecord) return knownRecord, nil } @@ -292,7 +314,11 @@ func StoreDirect(f *AlreadyUploadedFile, contents io.ReadCloser, expectedSize in err = db.Insert(media) if err != nil { - ds.DeleteObject(info.Location) // delete temp object + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } return nil, err } @@ -301,7 +327,11 @@ func StoreDirect(f *AlreadyUploadedFile, contents io.ReadCloser, expectedSize in if media.DatastoreId != ds.DatastoreId && media.Location != info.Location { ds2, err := datastore.LocateDatastore(ctx, media.DatastoreId) if err != nil { - ds.DeleteObject(info.Location) // delete temp object + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } return nil, err } if !ds2.ObjectExists(media.Location) { @@ -310,10 +340,22 @@ func StoreDirect(f *AlreadyUploadedFile, contents io.ReadCloser, expectedSize in return nil, err } - ds2.OverwriteObject(media.Location, stream, ctx) - ds.DeleteObject(info.Location) + err2 := ds2.OverwriteObject(media.Location, stream, ctx) + if err2 != nil { + ctx.Log.Warn("Error overwriting object", err2) + sentry.CaptureException(err2) + } + err2 = ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } } else { - ds.DeleteObject(info.Location) + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } } } @@ -324,13 +366,21 @@ func StoreDirect(f *AlreadyUploadedFile, contents io.ReadCloser, expectedSize in // The media doesn't already exist - save it as new if info.SizeBytes <= 0 { - ds.DeleteObject(info.Location) + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } return nil, errors.New("file has no contents") } err = checkSpam(contentBytes, filename, contentType, userId, origin, mediaId) if err != nil { - ds.DeleteObject(info.Location) // delete temp object + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } return nil, err } @@ -351,7 +401,11 @@ func StoreDirect(f *AlreadyUploadedFile, contents io.ReadCloser, expectedSize in err = db.Insert(media) if err != nil { - ds.DeleteObject(info.Location) // delete temp object + err2 := ds.DeleteObject(info.Location) // delete temp object + if err2 != nil { + ctx.Log.Warn("Error deleting temporary upload", err2) + sentry.CaptureException(err2) + } return nil, err } diff --git a/storage/datastore/datastore_ref.go b/storage/datastore/datastore_ref.go index f3b8ee88188672b5317b91ec0645e662e84dd25c..a4e45d2adaaa212627a053700f96761ad0b947e2 100644 --- a/storage/datastore/datastore_ref.go +++ b/storage/datastore/datastore_ref.go @@ -128,3 +128,15 @@ func (d *DatastoreRef) OverwriteObject(location string, stream io.ReadCloser, ct return errors.New("unknown datastore type") } } + +func (d *DatastoreRef) ListObjectIds(ctx rcontext.RequestContext) ([]string, error) { + if d.Type == "s3" { + s3, err := ds_s3.GetOrCreateS3Datastore(d.DatastoreId, d.config) + if err != nil { + return nil, err + } + return s3.ListObjects() + } else { + return nil, errors.New("cannot list objects in this datastore type") + } +} diff --git a/storage/datastore/ds_s3/s3_store.go b/storage/datastore/ds_s3/s3_store.go index 9140a2cce7133c7d6941d0977c050b6f7e6cef98..2ba4dfaa3ba2ca18259840e33bffc7ee9df38ac6 100644 --- a/storage/datastore/ds_s3/s3_store.go +++ b/storage/datastore/ds_s3/s3_store.go @@ -21,12 +21,12 @@ import ( var stores = make(map[string]*s3Datastore) type s3Datastore struct { - conf config.DatastoreConfig - dsId string - client *minio.Client - bucket string - region string - tempPath string + conf config.DatastoreConfig + dsId string + client *minio.Client + bucket string + region string + tempPath string storageClass string } @@ -71,12 +71,12 @@ func GetOrCreateS3Datastore(dsId string, conf config.DatastoreConfig) (*s3Datast } s3ds := &s3Datastore{ - conf: conf, - dsId: dsId, - client: s3client, - bucket: bucket, - region: region, - tempPath: tempPath, + conf: conf, + dsId: dsId, + client: s3client, + bucket: bucket, + region: region, + tempPath: tempPath, storageClass: storageClass, } stores[dsId] = s3ds @@ -233,3 +233,13 @@ func (s *s3Datastore) OverwriteObject(location string, stream io.ReadCloser) err _, err := s.client.PutObject(s.bucket, location, stream, -1, minio.PutObjectOptions{StorageClass: s.storageClass}) return err } + +func (s *s3Datastore) ListObjects() ([]string, error) { + doneCh := make(chan struct{}) + defer close(doneCh) + list := make([]string, 0) + for message := range s.client.ListObjectsV2(s.bucket, "", true, doneCh) { + list = append(list, message.Key) + } + return list, nil +} diff --git a/storage/stores/media_store.go b/storage/stores/media_store.go index cf00064cf6216497bf1149b20b511a5d386c26d4..481185e556afb6cf9fb432be746dc20beb18c0ef 100644 --- a/storage/stores/media_store.go +++ b/storage/stores/media_store.go @@ -32,35 +32,37 @@ const selectMediaByUserBefore = "SELECT origin, media_id, upload_name, content_t const selectMediaByDomainBefore = "SELECT origin, media_id, upload_name, content_type, user_id, sha256_hash, size_bytes, datastore_id, location, creation_ts, quarantined FROM media WHERE origin = $1 AND creation_ts <= $2" const selectMediaByLocation = "SELECT origin, media_id, upload_name, content_type, user_id, sha256_hash, size_bytes, datastore_id, location, creation_ts, quarantined FROM media WHERE datastore_id = $1 AND location = $2" const selectIfQuarantined = "SELECT 1 FROM media WHERE sha256_hash = $1 AND quarantined = $2 LIMIT 1;" +const selectMediaLocationsForDatastore = "SELECT distinct location FROM media WHERE datastore_id = $1;" var dsCacheByPath = sync.Map{} // [string] => Datastore var dsCacheById = sync.Map{} // [string] => Datastore type mediaStoreStatements struct { - selectMedia *sql.Stmt - selectMediaByHash *sql.Stmt - insertMedia *sql.Stmt - selectOldMedia *sql.Stmt - selectOrigins *sql.Stmt - deleteMedia *sql.Stmt - updateQuarantined *sql.Stmt - selectDatastore *sql.Stmt - selectDatastoreByUri *sql.Stmt - insertDatastore *sql.Stmt - selectMediaWithoutDatastore *sql.Stmt - updateMediaDatastoreAndLocation *sql.Stmt - selectAllDatastores *sql.Stmt - selectMediaInDatastoreOlderThan *sql.Stmt - selectAllMediaForServer *sql.Stmt - selectAllMediaForServerUsers *sql.Stmt - selectAllMediaForServerIds *sql.Stmt - selectQuarantinedMedia *sql.Stmt - selectServerQuarantinedMedia *sql.Stmt - selectMediaByUser *sql.Stmt - selectMediaByUserBefore *sql.Stmt - selectMediaByDomainBefore *sql.Stmt - selectMediaByLocation *sql.Stmt - selectIfQuarantined *sql.Stmt + selectMedia *sql.Stmt + selectMediaByHash *sql.Stmt + insertMedia *sql.Stmt + selectOldMedia *sql.Stmt + selectOrigins *sql.Stmt + deleteMedia *sql.Stmt + updateQuarantined *sql.Stmt + selectDatastore *sql.Stmt + selectDatastoreByUri *sql.Stmt + insertDatastore *sql.Stmt + selectMediaWithoutDatastore *sql.Stmt + updateMediaDatastoreAndLocation *sql.Stmt + selectAllDatastores *sql.Stmt + selectMediaInDatastoreOlderThan *sql.Stmt + selectAllMediaForServer *sql.Stmt + selectAllMediaForServerUsers *sql.Stmt + selectAllMediaForServerIds *sql.Stmt + selectQuarantinedMedia *sql.Stmt + selectServerQuarantinedMedia *sql.Stmt + selectMediaByUser *sql.Stmt + selectMediaByUserBefore *sql.Stmt + selectMediaByDomainBefore *sql.Stmt + selectMediaByLocation *sql.Stmt + selectIfQuarantined *sql.Stmt + selectMediaLocationsForDatastore *sql.Stmt } type MediaStoreFactory struct { @@ -149,6 +151,9 @@ func InitMediaStore(sqlDb *sql.DB) (*MediaStoreFactory, error) { if store.stmts.selectIfQuarantined, err = store.sqlDb.Prepare(selectIfQuarantined); err != nil { return nil, err } + if store.stmts.selectMediaLocationsForDatastore, err = store.sqlDb.Prepare(selectMediaLocationsForDatastore); err != nil { + return nil, err + } return &store, nil } @@ -719,3 +724,22 @@ func (s *MediaStore) IsQuarantined(sha256hash string) (bool, error) { } return true, nil } + +func (s *MediaStore) GetDistinctLocationsForDatastore(datastoreId string) ([]string, error) { + rows, err := s.statements.selectMediaLocationsForDatastore.QueryContext(s.ctx, datastoreId) + if err != nil { + return nil, err + } + + locations := make([]string, 0) + for rows.Next() { + s := "" + err = rows.Scan(&s) + if err != nil { + return nil, err + } + locations = append(locations, s) + } + + return locations, nil +} diff --git a/storage/stores/thumbnail_store.go b/storage/stores/thumbnail_store.go index e9fcccc8e6531661594ccde8118d4cd58ef6fd08..65f489b15b2f31da51f1b8a1bd59449bebc525c4 100644 --- a/storage/stores/thumbnail_store.go +++ b/storage/stores/thumbnail_store.go @@ -17,18 +17,20 @@ const selectThumbnailsForMedia = "SELECT origin, media_id, width, height, method const deleteThumbnailsForMedia = "DELETE FROM thumbnails WHERE origin = $1 AND media_id = $2;" const selectThumbnailsCreatedBefore = "SELECT origin, media_id, width, height, method, animated, content_type, size_bytes, datastore_id, location, creation_ts, sha256_hash FROM thumbnails WHERE creation_ts < $1;" const deleteThumbnailsWithHash = "DELETE FROM thumbnails WHERE sha256_hash = $1;" +const selectThumbnailLocationsForDatastore = "SELECT distinct location FROM thumbnails WHERE datastore_id = $1;" type thumbnailStatements struct { - selectThumbnail *sql.Stmt - insertThumbnail *sql.Stmt - updateThumbnailHash *sql.Stmt - selectThumbnailsWithoutHash *sql.Stmt - selectThumbnailsWithoutDatastore *sql.Stmt - updateThumbnailDatastoreAndLocation *sql.Stmt - selectThumbnailsForMedia *sql.Stmt - deleteThumbnailsForMedia *sql.Stmt - selectThumbnailsCreatedBefore *sql.Stmt - deleteThumbnailsWithHash *sql.Stmt + selectThumbnail *sql.Stmt + insertThumbnail *sql.Stmt + updateThumbnailHash *sql.Stmt + selectThumbnailsWithoutHash *sql.Stmt + selectThumbnailsWithoutDatastore *sql.Stmt + updateThumbnailDatastoreAndLocation *sql.Stmt + selectThumbnailsForMedia *sql.Stmt + deleteThumbnailsForMedia *sql.Stmt + selectThumbnailsCreatedBefore *sql.Stmt + deleteThumbnailsWithHash *sql.Stmt + selectThumbnailLocationsForDatastore *sql.Stmt } type ThumbnailStoreFactory struct { @@ -78,6 +80,9 @@ func InitThumbnailStore(sqlDb *sql.DB) (*ThumbnailStoreFactory, error) { if store.stmts.deleteThumbnailsWithHash, err = store.sqlDb.Prepare(deleteThumbnailsWithHash); err != nil { return nil, err } + if store.stmts.selectThumbnailLocationsForDatastore, err = store.sqlDb.Prepare(selectThumbnailLocationsForDatastore); err != nil { + return nil, err + } return &store, nil } @@ -303,3 +308,22 @@ func (s *ThumbnailStore) DeleteWithHash(sha256hash string) error { } return nil } + +func (s *ThumbnailStore) GetDistinctLocationsForDatastore(datastoreId string) ([]string, error) { + rows, err := s.statements.selectThumbnailLocationsForDatastore.QueryContext(s.ctx, datastoreId) + if err != nil { + return nil, err + } + + locations := make([]string, 0) + for rows.Next() { + s := "" + err = rows.Scan(&s) + if err != nil { + return nil, err + } + locations = append(locations, s) + } + + return locations, nil +}