From bcf7ed91c97a65eacf450ab5a1e9b890417ddf21 Mon Sep 17 00:00:00 2001 From: Travis Ralston <travpc@gmail.com> Date: Sat, 20 Jan 2018 16:55:33 -0700 Subject: [PATCH] Add a concept of global admins and a remote media purge API Fixes #5 We register our own so that the endpoint is still accessible even behind a proxy (assuming the proxy is set up correctly). --- config.sample.yaml | 6 ++ docs/admin.md | 11 +++ .../matrix-media-repo/client/r0/purge.go | 58 ++++++++++++++++ .../matrix-media-repo/cmd/media_repo/main.go | 11 ++- .../matrix-media-repo/config/config.go | 2 + .../services/media_service/media_service.go | 42 +++++++++++ .../storage/stores/media_store.go | 69 +++++++++++++++++++ .../matrix-media-repo/util/config.go | 10 +++ 8 files changed, 207 insertions(+), 2 deletions(-) create mode 100644 docs/admin.md create mode 100644 src/github.com/turt2live/matrix-media-repo/client/r0/purge.go diff --git a/config.sample.yaml b/config.sample.yaml index 47d81610..442a4e31 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -21,6 +21,12 @@ homeservers: backoffAt: 10 # The number of consecutive failures in calling this homeserver before the # media repository will start backing off. This defaults to 10 if not given. +# These users have full access to the administrative functions of the media repository. +# See docs/admin.md for information on what these people can do. They must belong to one of the +# configured homeservers above. +admins: + - "@your_username:server.com" + # The file upload settings for the media repository uploads: maxBytes: 104857600 # 100MB default, 0 to disable diff --git a/docs/admin.md b/docs/admin.md new file mode 100644 index 00000000..53bb84b7 --- /dev/null +++ b/docs/admin.md @@ -0,0 +1,11 @@ +# Media repository administration + +All the API calls here require your user ID to be listed in the configuration as an administrator. After that, your access token for your homeserver will grant you access to these APIs. The URLs should be hit against a configured homeserver. For example, if you have `t2bot.io` configured as a homeserver, then the admin API can be used at `https://t2bot.io/_matrix/media/r0/admin/...`. + +## Remote media purge + +URL: `POST /_matrix/media/r0/admin/purge_remote?before_ts=1234567890&access_token=your_access_token` (`before_ts` is in milliseconds) + +This will delete remote media from the file store that was downloaded before the timestamp specified. If the file is referenced by newer remote media or local files to any of the configured homeservers, it will not be deleted. Be aware that removing a homeserver from the config will cause it to be considered a remote server, and therefore the media may be deleted. + +Any remote media that is deleted and requested by a user will be downloaded again. diff --git a/src/github.com/turt2live/matrix-media-repo/client/r0/purge.go b/src/github.com/turt2live/matrix-media-repo/client/r0/purge.go new file mode 100644 index 00000000..8cd7a09e --- /dev/null +++ b/src/github.com/turt2live/matrix-media-repo/client/r0/purge.go @@ -0,0 +1,58 @@ +package r0 + +import ( + "net/http" + "strconv" + + "github.com/sirupsen/logrus" + "github.com/turt2live/matrix-media-repo/client" + "github.com/turt2live/matrix-media-repo/matrix" + "github.com/turt2live/matrix-media-repo/services/media_service" + "github.com/turt2live/matrix-media-repo/util" +) + +type MediaPurgedResponse struct { + NumRemoved int `json:"total_removed"` +} + +func PurgeRemoteMedia(w http.ResponseWriter, r *http.Request, log *logrus.Entry) interface{} { + accessToken := util.GetAccessTokenFromRequest(r) + userId, err := matrix.GetUserIdFromToken(r.Context(), r.Host, accessToken) + if err != nil || userId == "" { + if err != nil { + log.Error("Error verifying token: " + err.Error()) + } + return client.AuthFailed() + } + isAdmin := util.IsGlobalAdmin(userId) + if !isAdmin { + log.Warn("User " + userId + " is not a repository administrator") + return client.AuthFailed() + } + + beforeTsStr := r.URL.Query().Get("before_ts") + if beforeTsStr == "" { + return client.BadRequest("Missing before_ts argument") + } + beforeTs, err := strconv.ParseInt(beforeTsStr, 10, 64) + if err != nil { + return client.BadRequest("Error parsing before_ts: " + err.Error()) + } + + log = log.WithFields(logrus.Fields{ + "beforeTs": beforeTs, + "userId": userId, + }) + + // We don't bother clearing the cache because it's still probably useful there + mediaSvc := media_service.New(r.Context(), log) + removed, err := mediaSvc.PurgeRemoteMediaBefore(beforeTs) + if err != nil { + log.Error("Error purging remote media: " + err.Error()) + return client.InternalServerError("Error purging remote media") + } + + return &MediaPurgedResponse{ + NumRemoved: removed, + } +} diff --git a/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go b/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go index a4f8f568..304a8ab5 100644 --- a/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go +++ b/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go @@ -69,18 +69,25 @@ func main() { thumbnailHandler := Handler{r0.ThumbnailMedia, hOpts} previewUrlHandler := Handler{r0.PreviewUrl, hOpts} identiconHandler := Handler{r0.Identicon, hOpts} + purgeHandler := Handler{r0.PurgeRemoteMedia, hOpts} routes := make(map[string]*ApiRoute) versions := []string{"r0", "v1"} // r0 is typically clients and v1 is typically servers - for i := 0; i < len(versions); i++ { - version := versions[i] + for _, version := range versions { + // Standard routes for the media repo routes["/_matrix/media/"+version+"/upload"] = &ApiRoute{"POST", uploadHandler} routes["/_matrix/media/"+version+"/download/{server:[a-zA-Z0-9.:-_]+}/{mediaId:[a-zA-Z0-9]+}"] = &ApiRoute{"GET", downloadHandler} routes["/_matrix/media/"+version+"/download/{server:[a-zA-Z0-9.:-_]+}/{mediaId:[a-zA-Z0-9]+}/{filename:[a-zA-Z0-9._-]+}"] = &ApiRoute{"GET", downloadHandler} routes["/_matrix/media/"+version+"/thumbnail/{server:[a-zA-Z0-9.:-_]+}/{mediaId:[a-zA-Z0-9]+}"] = &ApiRoute{"GET", thumbnailHandler} routes["/_matrix/media/"+version+"/preview_url"] = &ApiRoute{"GET", previewUrlHandler} routes["/_matrix/media/"+version+"/identicon/{seed:.*}"] = &ApiRoute{"GET", identiconHandler} + + // Custom routes for the media repo + routes["/_matrix/media/"+version+"/admin/purge_remote"] = &ApiRoute{"POST", purgeHandler} + + // Routes that don't fit the normal media spec + routes["/_matrix/client/"+version+"/admin/purge_media_cache"] = &ApiRoute{"POST", purgeHandler} } for routePath, opts := range routes { diff --git a/src/github.com/turt2live/matrix-media-repo/config/config.go b/src/github.com/turt2live/matrix-media-repo/config/config.go index 6db13453..33e457be 100644 --- a/src/github.com/turt2live/matrix-media-repo/config/config.go +++ b/src/github.com/turt2live/matrix-media-repo/config/config.go @@ -91,6 +91,7 @@ type CacheConfig struct { type MediaRepoConfig struct { General *GeneralConfig `yaml:"repo"` Homeservers []*HomeserverConfig `yaml:"homeservers,flow"` + Admins []string `yaml:"admins,flow"` Database *DatabaseConfig `yaml:"database"` Uploads *UploadsConfig `yaml:"uploads"` Downloads *DownloadsConfig `yaml:"downloads"` @@ -147,6 +148,7 @@ func NewDefaultConfig() *MediaRepoConfig { Postgres: "postgres://your_username:your_password@localhost/database_name?sslmode=disable", }, Homeservers: []*HomeserverConfig{}, + Admins: []string{}, Uploads: &UploadsConfig{ MaxSizeBytes: 104857600, // 100mb StoragePaths: []string{}, diff --git a/src/github.com/turt2live/matrix-media-repo/services/media_service/media_service.go b/src/github.com/turt2live/matrix-media-repo/services/media_service/media_service.go index ef547077..199dc9bc 100644 --- a/src/github.com/turt2live/matrix-media-repo/services/media_service/media_service.go +++ b/src/github.com/turt2live/matrix-media-repo/services/media_service/media_service.go @@ -2,6 +2,7 @@ package media_service import ( "context" + "fmt" "io" "os" "strconv" @@ -62,6 +63,47 @@ func (s *mediaService) IsTooLarge(contentLength int64, contentLengthHeader strin return false // We can only assume } +func (s *mediaService) PurgeRemoteMediaBefore(beforeTs int64) (int, error) { + origins, err := s.store.GetOrigins() + if err != nil { + return 0, err + } + + var excludedOrigins []string + for _, origin := range origins { + if util.IsServerOurs(origin) { + excludedOrigins = append(excludedOrigins, origin) + } + } + + oldMedia, err := s.store.GetOldMedia(excludedOrigins, beforeTs) + if err != nil { + return 0, err + } + + s.log.Info(fmt.Sprintf("Starting removal of %d remote media files (db records will be kept)", len(oldMedia))) + + removed := 0 + for _, media := range oldMedia { + // Delete the file first + err = os.Remove(media.Location) + if err != nil { + s.log.Warn("Cannot remove media " + media.Origin + "/" + media.MediaId + " because: " + err.Error()) + } else { + removed++ + s.log.Info("Removed remote media file: " + media.Origin + "/" + media.MediaId) + } + + // Try to remove the record from the database now + err = s.store.Delete(media.Origin, media.MediaId) + if err != nil { + s.log.Warn("Error removing media " + media.Origin + "/" + media.MediaId + " from database: " + err.Error()) + } + } + + return removed, nil +} + func (s *mediaService) UploadMedia(contents io.ReadCloser, contentType string, filename string, userId string, host string) (*types.Media, error) { defer contents.Close() var data io.Reader diff --git a/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go b/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go index 768c2550..03df6a5d 100644 --- a/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go +++ b/src/github.com/turt2live/matrix-media-repo/storage/stores/media_store.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" + "github.com/lib/pq" "github.com/sirupsen/logrus" "github.com/turt2live/matrix-media-repo/types" ) @@ -11,11 +12,17 @@ import ( const selectMedia = "SELECT origin, media_id, upload_name, content_type, user_id, sha256_hash, size_bytes, location, creation_ts FROM media WHERE origin = $1 and media_id = $2;" const selectMediaByHash = "SELECT origin, media_id, upload_name, content_type, user_id, sha256_hash, size_bytes, location, creation_ts FROM media WHERE sha256_hash = $1;" const insertMedia = "INSERT INTO media (origin, media_id, upload_name, content_type, user_id, sha256_hash, size_bytes, location, creation_ts) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9);" +const selectOldMedia = "SELECT m.origin, m.media_id, m.upload_name, m.content_type, m.user_id, m.sha256_hash, m.size_bytes, m.location, m.creation_ts FROM media AS m WHERE NOT(m.origin = ANY($1)) AND m.creation_ts < $2 AND (SELECT COUNT(*) FROM media AS d WHERE d.sha256_hash = m.sha256_hash AND d.creation_ts >= $2) = 0 AND (SELECT COUNT(*) FROM media AS d WHERE d.sha256_hash = m.sha256_hash AND d.origin = ANY($1)) = 0;" +const selectOrigins = "SELECT DISTINCT origin FROM media;" +const deleteMedia = "DELETE FROM media WHERE origin = $1 AND media_id = $2;" type mediaStoreStatements struct { selectMedia *sql.Stmt selectMediaByHash *sql.Stmt insertMedia *sql.Stmt + selectOldMedia *sql.Stmt + selectOrigins *sql.Stmt + deleteMedia *sql.Stmt } type MediaStoreFactory struct { @@ -45,6 +52,15 @@ func InitMediaStore(sqlDb *sql.DB) (*MediaStoreFactory, error) { if store.stmts.insertMedia, err = store.sqlDb.Prepare(insertMedia); err != nil { return nil, err } + if store.stmts.selectOldMedia, err = store.sqlDb.Prepare(selectOldMedia); err != nil { + return nil, err + } + if store.stmts.selectOrigins, err = store.sqlDb.Prepare(selectOrigins); err != nil { + return nil, err + } + if store.stmts.deleteMedia, err = store.sqlDb.Prepare(deleteMedia); err != nil { + return nil, err + } return &store, nil } @@ -118,3 +134,56 @@ func (s *MediaStore) Get(origin string, mediaId string) (*types.Media, error) { ) return m, err } + +func (s *MediaStore) GetOldMedia(exceptOrigins []string, beforeTs int64) ([]*types.Media, error) { + rows, err := s.statements.selectOldMedia.QueryContext(s.ctx, pq.Array(exceptOrigins), beforeTs) + if err != nil { + return nil, err + } + + var results []*types.Media + for rows.Next() { + obj := &types.Media{} + err = rows.Scan( + &obj.Origin, + &obj.MediaId, + &obj.UploadName, + &obj.ContentType, + &obj.UserId, + &obj.Sha256Hash, + &obj.SizeBytes, + &obj.Location, + &obj.CreationTs, + ) + if err != nil { + return nil, err + } + results = append(results, obj) + } + + return results, nil +} + +func (s *MediaStore) GetOrigins() ([]string, error) { + rows, err := s.statements.selectOrigins.QueryContext(s.ctx) + if err != nil { + return nil, err + } + + var results []string + for rows.Next() { + obj := "" + err = rows.Scan(&obj) + if err != nil { + return nil, err + } + results = append(results, obj) + } + + return results, nil +} + +func (s *MediaStore) Delete(origin string, mediaId string) (error) { + _, err := s.statements.deleteMedia.ExecContext(s.ctx, origin, mediaId) + return err +} diff --git a/src/github.com/turt2live/matrix-media-repo/util/config.go b/src/github.com/turt2live/matrix-media-repo/util/config.go index 2fed8d6c..98f51e65 100644 --- a/src/github.com/turt2live/matrix-media-repo/util/config.go +++ b/src/github.com/turt2live/matrix-media-repo/util/config.go @@ -17,3 +17,13 @@ func GetHomeserverConfig(server string) (*config.HomeserverConfig) { return nil } + +func IsGlobalAdmin(userId string) bool { + for _, admin := range config.Get().Admins { + if admin == userId { + return true + } + } + + return false +} -- GitLab