diff --git a/config.sample.yaml b/config.sample.yaml index 47d816103637801a3cf8cc450db7654222398101..442a4e31f08f81c7c00200ab807419297097686e 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -21,6 +21,12 @@ homeservers: backoffAt: 10 # The number of consecutive failures in calling this homeserver before the # media repository will start backing off. This defaults to 10 if not given. +# These users have full access to the administrative functions of the media repository. +# See docs/admin.md for information on what these people can do. They must belong to one of the +# configured homeservers above. +admins: + - "@your_username:server.com" + # The file upload settings for the media repository uploads: maxBytes: 104857600 # 100MB default, 0 to disable diff --git a/docs/admin.md b/docs/admin.md new file mode 100644 index 0000000000000000000000000000000000000000..53bb84b74b2a304cdeda665bb285a9f5b2b39253 --- /dev/null +++ b/docs/admin.md @@ -0,0 +1,11 @@ +# Media repository administration + +All the API calls here require your user ID to be listed in the configuration as an administrator. After that, your access token for your homeserver will grant you access to these APIs. The URLs should be hit against a configured homeserver. For example, if you have `t2bot.io` configured as a homeserver, then the admin API can be used at `https://t2bot.io/_matrix/media/r0/admin/...`. + +## Remote media purge + +URL: `POST /_matrix/media/r0/admin/purge_remote?before_ts=1234567890&access_token=your_access_token` (`before_ts` is in milliseconds) + +This will delete remote media from the file store that was downloaded before the timestamp specified. If the file is referenced by newer remote media or local files to any of the configured homeservers, it will not be deleted. Be aware that removing a homeserver from the config will cause it to be considered a remote server, and therefore the media may be deleted. + +Any remote media that is deleted and requested by a user will be downloaded again. diff --git a/src/github.com/turt2live/matrix-media-repo/client/r0/purge.go b/src/github.com/turt2live/matrix-media-repo/client/r0/purge.go new file mode 100644 index 0000000000000000000000000000000000000000..8cd7a09e0e4b4989b2d85d05a8e0851d60169933 --- /dev/null +++ b/src/github.com/turt2live/matrix-media-repo/client/r0/purge.go @@ -0,0 +1,58 @@ +package r0 + +import ( + "net/http" + "strconv" + + "github.com/sirupsen/logrus" + "github.com/turt2live/matrix-media-repo/client" + "github.com/turt2live/matrix-media-repo/matrix" + "github.com/turt2live/matrix-media-repo/services/media_service" + "github.com/turt2live/matrix-media-repo/util" +) + +type MediaPurgedResponse struct { + NumRemoved int `json:"total_removed"` +} + +func PurgeRemoteMedia(w http.ResponseWriter, r *http.Request, log *logrus.Entry) interface{} { + accessToken := util.GetAccessTokenFromRequest(r) + userId, err := matrix.GetUserIdFromToken(r.Context(), r.Host, accessToken) + if err != nil || userId == "" { + if err != nil { + log.Error("Error verifying token: " + err.Error()) + } + return client.AuthFailed() + } + isAdmin := util.IsGlobalAdmin(userId) + if !isAdmin { + log.Warn("User " + userId + " is not a repository administrator") + return client.AuthFailed() + } + + beforeTsStr := r.URL.Query().Get("before_ts") + if beforeTsStr == "" { + return client.BadRequest("Missing before_ts argument") + } + beforeTs, err := strconv.ParseInt(beforeTsStr, 10, 64) + if err != nil { + return client.BadRequest("Error parsing before_ts: " + err.Error()) + } + + log = log.WithFields(logrus.Fields{ + "beforeTs": beforeTs, + "userId": userId, + }) + + // We don't bother clearing the cache because it's still probably useful there + mediaSvc := media_service.New(r.Context(), log) + removed, err := mediaSvc.PurgeRemoteMediaBefore(beforeTs) + if err != nil { + log.Error("Error purging remote media: " + err.Error()) + return client.InternalServerError("Error purging remote media") + } + + return &MediaPurgedResponse{ + NumRemoved: removed, + } +} diff --git a/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go b/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go index a4f8f5682d5bfa25c9903e10f83cdb1e1480b276..304a8ab5998718877413e6477ade4749c97bee45 100644 --- a/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go +++ b/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go @@ -69,18 +69,25 @@ func main() { thumbnailHandler := Handler{r0.ThumbnailMedia, hOpts} previewUrlHandler := Handler{r0.PreviewUrl, hOpts} identiconHandler := Handler{r0.Identicon, hOpts} + purgeHandler := Handler{r0.PurgeRemoteMedia, hOpts} routes := make(map[string]*ApiRoute) versions := []string{"r0", "v1"} // r0 is typically clients and v1 is typically servers - for i := 0; i < len(versions); i++ { - version := versions[i] + for _, version := range versions { + // Standard routes for the media repo routes["/_matrix/media/"+version+"/upload"] = &ApiRoute{"POST", uploadHandler} routes["/_matrix/media/"+version+"/download/{server:[a-zA-Z0-9.:-_]+}/{mediaId:[a-zA-Z0-9]+}"] = &ApiRoute{"GET", downloadHandler} routes["/_matrix/media/"+version+"/download/{server:[a-zA-Z0-9.:-_]+}/{mediaId:[a-zA-Z0-9]+}/{filename:[a-zA-Z0-9._-]+}"] = &ApiRoute{"GET", downloadHandler} routes["/_matrix/media/"+version+"/thumbnail/{server:[a-zA-Z0-9.:-_]+}/{mediaId:[a-zA-Z0-9]+}"] = &ApiRoute{"GET", thumbnailHandler} routes["/_matrix/media/"+version+"/preview_url"] = &ApiRoute{"GET", previewUrlHandler} routes["/_matrix/media/"+version+"/identicon/{seed:.*}"] = &ApiRoute{"GET", identiconHandler} + + // Custom routes for the media repo + routes["/_matrix/media/"+version+"/admin/purge_remote"] = &ApiRoute{"POST", purgeHandler} + + // Routes that don't fit the normal media spec + routes["/_matrix/client/"+version+"/admin/purge_media_cache"] = &ApiRoute{"POST", purgeHandler} } for routePath, opts := range routes { diff --git a/src/github.com/turt2live/matrix-media-repo/config/config.go b/src/github.com/turt2live/matrix-media-repo/config/config.go index 6db134538d599b789ecb6b6776df348539d26ac7..33e457be34a4159785a09e0c87bb5fca2a78cfde 100644 --- a/src/github.com/turt2live/matrix-media-repo/config/config.go +++ b/src/github.com/turt2live/matrix-media-repo/config/config.go @@ -91,6 +91,7 @@ type CacheConfig struct { type MediaRepoConfig struct { General *GeneralConfig `yaml:"repo"` Homeservers []*HomeserverConfig `yaml:"homeservers,flow"` + Admins []string `yaml:"admins,flow"` Database *DatabaseConfig `yaml:"database"` Uploads *UploadsConfig `yaml:"uploads"` Downloads *DownloadsConfig `yaml:"downloads"` @@ -147,6 +148,7 @@ func NewDefaultConfig() *MediaRepoConfig { Postgres: "postgres://your_username:your_password@localhost/database_name?sslmode=disable", }, Homeservers: []*HomeserverConfig{}, + Admins: []string{}, Uploads: &UploadsConfig{ MaxSizeBytes: 104857600, // 100mb StoragePaths: []string{}, diff --git a/src/github.com/turt2live/matrix-media-repo/services/media_service/media_service.go b/src/github.com/turt2live/matrix-media-repo/services/media_service/media_service.go index ef54707706f0d94dbddf65f936a8b6a63635a545..199dc9bc8e3289ee65461990d568d9d5cd47f0b5 100644 --- a/src/github.com/turt2live/matrix-media-repo/services/media_service/media_service.go +++ b/src/github.com/turt2live/matrix-media-repo/services/media_service/media_service.go @@ -2,6 +2,7 @@ package media_service import ( "context" + "fmt" "io" "os" "strconv" @@ -62,6 +63,47 @@ func (s *mediaService) IsTooLarge(contentLength int64, contentLengthHeader strin return false // We can only assume } +func (s *mediaService) PurgeRemoteMediaBefore(beforeTs int64) (int, error) { + origins, err := s.store.GetOrigins() + if err != nil { + return 0, err + } + + var excludedOrigins []string + for _, origin := range origins { + if util.IsServerOurs(origin) { + excludedOrigins = append(excludedOrigins, origin) + } + } + + oldMedia, err := s.store.GetOldMedia(excludedOrigins, beforeTs) + if err != nil { + return 0, err + } + + s.log.Info(fmt.Sprintf("Starting removal of %d remote media files (db records will be kept)", len(oldMedia))) + + removed := 0 + for _, media := range oldMedia { + // Delete the file first + err = os.Remove(media.Location) + if err != nil { + s.log.Warn("Cannot remove media " + media.Origin + "/" + media.MediaId + " because: " + err.Error()) + } else { + removed++ + s.log.Info("Removed remote media file: " + media.Origin + "/" + media.MediaId) + } + + // Try to remove the record from the database now + err = s.store.Delete(media.Origin, media.MediaId) + if err != nil { + s.log.Warn("Error removing media " + media.Origin + "/" + media.MediaId + " from database: " + err.Error()) + } + } + + return removed, nil +} + func (s *mediaService) UploadMedia(contents io.ReadCloser, contentType string, filename string, userId string, host string) (*types.Media, error) { defer contents.Close() var data io.Reader diff --git a/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go b/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go index 768c2550cf6478fe214f45252d2344d2b40bf2de..03df6a5dc119d621d26be22045b04551ef372a5e 100644 --- a/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go +++ b/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" + "github.com/lib/pq" "github.com/sirupsen/logrus" "github.com/turt2live/matrix-media-repo/types" ) @@ -11,11 +12,17 @@ import ( const selectMedia = "SELECT origin, media_id, upload_name, content_type, user_id, sha256_hash, size_bytes, location, creation_ts FROM media WHERE origin = $1 and media_id = $2;" const selectMediaByHash = "SELECT origin, media_id, upload_name, content_type, user_id, sha256_hash, size_bytes, location, creation_ts FROM media WHERE sha256_hash = $1;" const insertMedia = "INSERT INTO media (origin, media_id, upload_name, content_type, user_id, sha256_hash, size_bytes, location, creation_ts) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9);" +const selectOldMedia = "SELECT m.origin, m.media_id, m.upload_name, m.content_type, m.user_id, m.sha256_hash, m.size_bytes, m.location, m.creation_ts FROM media AS m WHERE NOT(m.origin = ANY($1)) AND m.creation_ts < $2 AND (SELECT COUNT(*) FROM media AS d WHERE d.sha256_hash = m.sha256_hash AND d.creation_ts >= $2) = 0 AND (SELECT COUNT(*) FROM media AS d WHERE d.sha256_hash = m.sha256_hash AND d.origin = ANY($1)) = 0;" +const selectOrigins = "SELECT DISTINCT origin FROM media;" +const deleteMedia = "DELETE FROM media WHERE origin = $1 AND media_id = $2;" type mediaStoreStatements struct { selectMedia *sql.Stmt selectMediaByHash *sql.Stmt insertMedia *sql.Stmt + selectOldMedia *sql.Stmt + selectOrigins *sql.Stmt + deleteMedia *sql.Stmt } type MediaStoreFactory struct { @@ -45,6 +52,15 @@ func InitMediaStore(sqlDb *sql.DB) (*MediaStoreFactory, error) { if store.stmts.insertMedia, err = store.sqlDb.Prepare(insertMedia); err != nil { return nil, err } + if store.stmts.selectOldMedia, err = store.sqlDb.Prepare(selectOldMedia); err != nil { + return nil, err + } + if store.stmts.selectOrigins, err = store.sqlDb.Prepare(selectOrigins); err != nil { + return nil, err + } + if store.stmts.deleteMedia, err = store.sqlDb.Prepare(deleteMedia); err != nil { + return nil, err + } return &store, nil } @@ -118,3 +134,56 @@ func (s *MediaStore) Get(origin string, mediaId string) (*types.Media, error) { ) return m, err } + +func (s *MediaStore) GetOldMedia(exceptOrigins []string, beforeTs int64) ([]*types.Media, error) { + rows, err := s.statements.selectOldMedia.QueryContext(s.ctx, pq.Array(exceptOrigins), beforeTs) + if err != nil { + return nil, err + } + + var results []*types.Media + for rows.Next() { + obj := &types.Media{} + err = rows.Scan( + &obj.Origin, + &obj.MediaId, + &obj.UploadName, + &obj.ContentType, + &obj.UserId, + &obj.Sha256Hash, + &obj.SizeBytes, + &obj.Location, + &obj.CreationTs, + ) + if err != nil { + return nil, err + } + results = append(results, obj) + } + + return results, nil +} + +func (s *MediaStore) GetOrigins() ([]string, error) { + rows, err := s.statements.selectOrigins.QueryContext(s.ctx) + if err != nil { + return nil, err + } + + var results []string + for rows.Next() { + obj := "" + err = rows.Scan(&obj) + if err != nil { + return nil, err + } + results = append(results, obj) + } + + return results, nil +} + +func (s *MediaStore) Delete(origin string, mediaId string) (error) { + _, err := s.statements.deleteMedia.ExecContext(s.ctx, origin, mediaId) + return err +} diff --git a/src/github.com/turt2live/matrix-media-repo/util/config.go b/src/github.com/turt2live/matrix-media-repo/util/config.go index 2fed8d6cfa7718970136be780758bdc0d467e0ce..98f51e654f1e9bb6b5adba5994587b4d51bd7311 100644 --- a/src/github.com/turt2live/matrix-media-repo/util/config.go +++ b/src/github.com/turt2live/matrix-media-repo/util/config.go @@ -17,3 +17,13 @@ func GetHomeserverConfig(server string) (*config.HomeserverConfig) { return nil } + +func IsGlobalAdmin(userId string) bool { + for _, admin := range config.Get().Admins { + if admin == userId { + return true + } + } + + return false +}