diff --git a/src/github.com/turt2live/matrix-media-repo/api/custom/datastores.go b/src/github.com/turt2live/matrix-media-repo/api/custom/datastores.go new file mode 100644 index 0000000000000000000000000000000000000000..bc81d973252421366019b1b64b1ff446c6be0908 --- /dev/null +++ b/src/github.com/turt2live/matrix-media-repo/api/custom/datastores.go @@ -0,0 +1,91 @@ +package custom + +import ( + "net/http" + "strconv" + + "github.com/gorilla/mux" + "github.com/sirupsen/logrus" + "github.com/turt2live/matrix-media-repo/api" + "github.com/turt2live/matrix-media-repo/storage" + "github.com/turt2live/matrix-media-repo/util" +) + +type DatastoreMigrationEstimate struct { + ThumbnailsAffected int64 `json:"thumbnails_affected"` + ThumbnailHashesAffected int64 `json:"thumbnail_hashes_affected"` + ThumbnailBytes int64 `json:"thumbnail_bytes"` + MediaAffected int64 `json:"media_affected"` + MediaHashesAffected int64 `json:"media_hashes_affected"` + MediaBytes int64 `json:"media_bytes"` + TotalHashesAffected int64 `json:"total_hashes_affected"` + TotalBytes int64 `json:"total_bytes"` +} + +func GetDatastoreStorageEstimate(r *http.Request, log *logrus.Entry, user api.UserInfo) interface{} { + beforeTsStr := r.URL.Query().Get("before_ts") + beforeTs := util.NowMillis() + var err error + if beforeTsStr != "" { + beforeTs, err = strconv.ParseInt(beforeTsStr, 10, 64) + if err != nil { + return api.BadRequest("Error parsing before_ts: " + err.Error()) + } + } + + params := mux.Vars(r) + + datastoreId := params["datastoreId"] + + log = log.WithFields(logrus.Fields{ + "beforeTs": beforeTs, + "datastoreId": datastoreId, + }) + + estimates := &DatastoreMigrationEstimate{} + seenHashes := make(map[string]bool) + seenMediaHashes := make(map[string]bool) + seenThumbnailHashes := make(map[string]bool) + + db := storage.GetDatabase().GetMetadataStore(r.Context(), log) + media, err := db.GetOldMediaInDatastore(datastoreId, beforeTs) + if err != nil { + log.Error(err) + return api.InternalServerError("Failed to get media from database") + } + + for _, record := range media { + estimates.MediaAffected++ + + if _, found := seenHashes[record.Sha256Hash]; !found { + estimates.TotalBytes += record.SizeBytes + estimates.TotalHashesAffected++ + } + if _, found := seenMediaHashes[record.Sha256Hash]; !found { + estimates.MediaBytes += record.SizeBytes + estimates.MediaHashesAffected++ + } + + seenHashes[record.Sha256Hash] = true + seenMediaHashes[record.Sha256Hash] = true + } + + thumbnails, err := db.GetOldMediaInDatastore(datastoreId, beforeTs) + for _, record := range thumbnails { + estimates.ThumbnailsAffected++ + + if _, found := seenHashes[record.Sha256Hash]; !found { + estimates.TotalBytes += record.SizeBytes + estimates.TotalHashesAffected++ + } + if _, found := seenThumbnailHashes[record.Sha256Hash]; !found { + estimates.ThumbnailBytes += record.SizeBytes + estimates.ThumbnailHashesAffected++ + } + + seenHashes[record.Sha256Hash] = true + seenThumbnailHashes[record.Sha256Hash] = true + } + + return estimates +} diff --git a/src/github.com/turt2live/matrix-media-repo/api/webserver/webserver.go b/src/github.com/turt2live/matrix-media-repo/api/webserver/webserver.go index 444384d8179d1cbce0bafd7154c651d26d47af3b..1d5e2cd55d0aa5fd5b6d204d3a90379baf68ccaf 100644 --- a/src/github.com/turt2live/matrix-media-repo/api/webserver/webserver.go +++ b/src/github.com/turt2live/matrix-media-repo/api/webserver/webserver.go @@ -37,6 +37,7 @@ func Init() { localCopyHandler := handler{api.AccessTokenRequiredRoute(unstable.LocalCopy), "local_copy", counter} infoHandler := handler{api.AccessTokenRequiredRoute(unstable.MediaInfo), "info", counter} configHandler := handler{api.AccessTokenRequiredRoute(r0.PublicConfig), "config", counter} + storageEstimateHandler := handler{api.RepoAdminRoute(custom.GetDatastoreStorageEstimate), "get_storage_estimate", counter} routes := make(map[string]route) versions := []string{"r0", "v1", "unstable"} // r0 is typically clients and v1 is typically servers. v1 is deprecated. @@ -55,6 +56,7 @@ func Init() { routes["/_matrix/media/"+version+"/admin/purge_remote"] = route{"POST", purgeHandler} routes["/_matrix/media/"+version+"/admin/quarantine/{server:[a-zA-Z0-9.:\\-_]+}/{mediaId:[a-zA-Z0-9.\\-_]+}"] = route{"POST", quarantineHandler} routes["/_matrix/media/"+version+"/admin/room/{roomId:[^/]+}/quarantine"] = route{"POST", quarantineRoomHandler} + routes["/_matrix/media/"+version+"/admin/datastore/{datastoreId:[^/]+}/size_estimate"] = route{"GET", storageEstimateHandler} // Routes that we should handle but aren't in the media namespace (synapse compat) routes["/_matrix/client/"+version+"/admin/purge_media_cache"] = route{"POST", purgeHandler} diff --git a/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go b/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go index c59dddfdd68ae49e89686969de4a64345e438c70..376674b46e55e43ca18c5865b0993d4c88adf441 100644 --- a/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go +++ b/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go @@ -41,6 +41,7 @@ type mediaStoreStatements struct { selectMediaWithoutDatastore *sql.Stmt updateMediaDatastoreAndLocation *sql.Stmt selectAllDatastores *sql.Stmt + selectMediaInDatastoreOlderThan *sql.Stmt } type MediaStoreFactory struct { diff --git a/src/github.com/turt2live/matrix-media-repo/storage/stores/metadata_store.go b/src/github.com/turt2live/matrix-media-repo/storage/stores/metadata_store.go index c4f200b102800b4b1753824e8b8be472bc532286..e58d969c9e48b8e261650317b4d3498a495c064a 100644 --- a/src/github.com/turt2live/matrix-media-repo/storage/stores/metadata_store.go +++ b/src/github.com/turt2live/matrix-media-repo/storage/stores/metadata_store.go @@ -5,6 +5,7 @@ import ( "database/sql" "github.com/sirupsen/logrus" + "github.com/turt2live/matrix-media-repo/types" ) type folderSize struct { @@ -13,10 +14,15 @@ type folderSize struct { const selectSizeOfDatastore = "SELECT COALESCE(SUM(size_bytes), 0) + COALESCE((SELECT SUM(size_bytes) FROM thumbnails WHERE datastore_id = $1), 0) AS size_total FROM media WHERE datastore_id = $1;" const upsertLastAccessed = "INSERT INTO last_access (sha256_hash, last_access_ts) VALUES ($1, $2) ON CONFLICT (sha256_hash) DO UPDATE SET last_access_ts = $2" +const selectMediaLastAccessedBeforeInDatastore = "SELECT m.sha256_hash, m.size_bytes, m.location, m.datastore_id, m.creation_ts, a.last_access_ts FROM media AS m JOIN last_access AS a ON m.sha256_hash = a.sha256_hash WHERE a.last_access_ts < $1 AND m.datastore_id = $2"; + +const selectThumbnailsLastAccessedBeforeInDatastore = "SELECT m.sha256_hash, m.size_bytes, m.location, m.datastore_id, m.creation_ts, a.last_access_ts FROM thumbnails AS m JOIN last_access AS a ON m.sha256_hash = a.sha256_hash WHERE a.last_access_ts < $1 AND m.datastore_id = $2"; type metadataStoreStatements struct { - upsertLastAccessed *sql.Stmt - selectSizeOfDatastore *sql.Stmt + upsertLastAccessed *sql.Stmt + selectSizeOfDatastore *sql.Stmt + selectMediaLastAccessedBeforeInDatastore *sql.Stmt + selectThumbnailsLastAccessedBeforeInDatastore *sql.Stmt } type MetadataStoreFactory struct { @@ -43,6 +49,12 @@ func InitMetadataStore(sqlDb *sql.DB) (*MetadataStoreFactory, error) { if store.stmts.selectSizeOfDatastore, err = store.sqlDb.Prepare(selectSizeOfDatastore); err != nil { return nil, err } + if store.stmts.selectMediaLastAccessedBeforeInDatastore, err = store.sqlDb.Prepare(selectMediaLastAccessedBeforeInDatastore); err != nil { + return nil, err + } + if store.stmts.selectThumbnailsLastAccessedBeforeInDatastore, err = store.sqlDb.Prepare(selectThumbnailsLastAccessedBeforeInDatastore); err != nil { + return nil, err + } return &store, nil } @@ -66,3 +78,55 @@ func (s *MetadataStore) GetEstimatedSizeOfDatastore(datastoreId string) (int64, err := s.statements.selectSizeOfDatastore.QueryRowContext(s.ctx, datastoreId).Scan(&r.Size) return r.Size, err } + +func (s *MetadataStore) GetOldMediaInDatastore(datastoreId string, beforeTs int64) ([]*types.MinimalMediaMetadata, error) { + rows, err := s.statements.selectMediaLastAccessedBeforeInDatastore.QueryContext(s.ctx, beforeTs, datastoreId) + if err != nil { + return nil, err + } + + var results []*types.MinimalMediaMetadata + for rows.Next() { + obj := &types.MinimalMediaMetadata{} + err = rows.Scan( + &obj.Sha256Hash, + &obj.SizeBytes, + &obj.DatastoreId, + &obj.Location, + &obj.CreationTs, + &obj.LastAccessTs, + ) + if err != nil { + return nil, err + } + results = append(results, obj) + } + + return results, nil +} + +func (s *MetadataStore) GetOldThumbnailsInDatastore(datastoreId string, beforeTs int64) ([]*types.MinimalMediaMetadata, error) { + rows, err := s.statements.selectThumbnailsLastAccessedBeforeInDatastore.QueryContext(s.ctx, beforeTs, datastoreId) + if err != nil { + return nil, err + } + + var results []*types.MinimalMediaMetadata + for rows.Next() { + obj := &types.MinimalMediaMetadata{} + err = rows.Scan( + &obj.Sha256Hash, + &obj.SizeBytes, + &obj.DatastoreId, + &obj.Location, + &obj.CreationTs, + &obj.LastAccessTs, + ) + if err != nil { + return nil, err + } + results = append(results, obj) + } + + return results, nil +} diff --git a/src/github.com/turt2live/matrix-media-repo/types/media.go b/src/github.com/turt2live/matrix-media-repo/types/media.go index b24f1c441f6c5ef2531a92e5cd73a26ed1f2ae61..ce8ea0a146cdf2d7834263805b09303cd08c98fb 100644 --- a/src/github.com/turt2live/matrix-media-repo/types/media.go +++ b/src/github.com/turt2live/matrix-media-repo/types/media.go @@ -26,6 +26,15 @@ type MinimalMedia struct { KnownMedia *Media } +type MinimalMediaMetadata struct { + SizeBytes int64 + Sha256Hash string + Location string + CreationTs int64 + LastAccessTs int64 + DatastoreId string +} + func (m *Media) MxcUri() string { return "mxc://" + m.Origin + "/" + m.MediaId }