diff --git a/config.sample.yaml b/config.sample.yaml index f4be43712b2ca35ce0ebe256c096d29bc7bba807..38ef9bda4906727f0de469cf1e8cba0c04ce84cd 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -26,6 +26,27 @@ homeservers: admins: - "@your_username:example.org" +# Datastores are places where media should be persisted. This isn't dedicated for just uploads: +# thumbnails and other misc data is also stored in these places. When the media repo is looking +# to store new media (such as user uploads, thumbnails, etc) it will look for a datastore which +# is flagged as forUploads. It will try to use the smallest datastore first. If you enable an +# s3 datastore for uploads, it will always be chosen. +datastores: + - type: file + enabled: false # Enable this to set up data storage. + forUploads: true + opts: + path: /var/matrix/media + - type: s3 + enabled: false + forUploads: false # Enable this to put uploads in s3 + opts: + endpoint: sfo2.digitaloceanspaces.com + accessKeyId: "" + accessSecret: "" + ssl: true + bucketName: "your-media-bucket" + # The file upload settings for the media repository uploads: maxBytes: 104857600 # 100MB default, 0 to disable @@ -43,26 +64,10 @@ uploads: # This is intended for larger deployments where media should be distributed among other # directories, drives, servers, etc. For smaller deployments, a single entry in this list # is recommended. - # DEPRECATED. + # DEPRECATED: Use datastores instead. #storagePaths: # - /var/matrix/media - datastores: - - type: file - enabled: false # Enable this to set up data storage. - priority: 1 - opts: - path: /var/matrix/media - - type: s3 - enabled: false - priority: 1 - opts: - endpoint: sfo2.digitaloceanspaces.com - accessKeyId: "" - accessSecret: "" - ssl: true - bucketName: "your-media-bucket" - # An optional list of file types that are allowed to be uploaded. If */* or nothing is # supplied here, then all file types are allowed. Asterisks (*) are wildcards and can be # placed anywhere to match everything (eg: "image/*" matches all images). This will also diff --git a/src/github.com/turt2live/matrix-media-repo/api/r0/upload.go b/src/github.com/turt2live/matrix-media-repo/api/r0/upload.go index aff46a286d558e256eacb14c45b57084621fb577..aa1274a23c27c039d2b254ce7d300be771f08e42 100644 --- a/src/github.com/turt2live/matrix-media-repo/api/r0/upload.go +++ b/src/github.com/turt2live/matrix-media-repo/api/r0/upload.go @@ -17,6 +17,7 @@ type MediaUploadedResponse struct { func UploadMedia(r *http.Request, log *logrus.Entry, user api.UserInfo) interface{} { filename := r.URL.Query().Get("filename") + defer r.Body.Close() log = log.WithFields(logrus.Fields{ "filename": filename, @@ -29,20 +30,17 @@ func UploadMedia(r *http.Request, log *logrus.Entry, user api.UserInfo) interfac if upload_controller.IsRequestTooLarge(r.ContentLength, r.Header.Get("Content-Length")) { io.Copy(ioutil.Discard, r.Body) // Ditch the entire request - defer r.Body.Close() return api.RequestTooLarge() } if upload_controller.IsRequestTooSmall(r.ContentLength, r.Header.Get("Content-Length")) { io.Copy(ioutil.Discard, r.Body) // Ditch the entire request - defer r.Body.Close() return api.RequestTooSmall() } media, err := upload_controller.UploadMedia(r.Body, contentType, filename, user.UserId, r.Host, r.Context(), log) if err != nil { io.Copy(ioutil.Discard, r.Body) // Ditch the entire request - defer r.Body.Close() if err == common.ErrMediaNotAllowed { return api.BadRequest("Media content type not allowed on this server") diff --git a/src/github.com/turt2live/matrix-media-repo/api/webserver/webserver.go b/src/github.com/turt2live/matrix-media-repo/api/webserver/webserver.go index ca549aa80e062e982db6f7706069a10738bff2eb..444384d8179d1cbce0bafd7154c651d26d47af3b 100644 --- a/src/github.com/turt2live/matrix-media-repo/api/webserver/webserver.go +++ b/src/github.com/turt2live/matrix-media-repo/api/webserver/webserver.go @@ -51,12 +51,12 @@ func Init() { routes["/_matrix/media/"+version+"/identicon/{seed:.*}"] = route{"GET", identiconHandler} routes["/_matrix/media/"+version+"/config"] = route{"GET", configHandler} - // Routes that we define but are not part of the spec + // Routes that we define but are not part of the spec (management) routes["/_matrix/media/"+version+"/admin/purge_remote"] = route{"POST", purgeHandler} routes["/_matrix/media/"+version+"/admin/quarantine/{server:[a-zA-Z0-9.:\\-_]+}/{mediaId:[a-zA-Z0-9.\\-_]+}"] = route{"POST", quarantineHandler} routes["/_matrix/media/"+version+"/admin/room/{roomId:[^/]+}/quarantine"] = route{"POST", quarantineRoomHandler} - // Routes that we should handle but aren't in the media namespace + // Routes that we should handle but aren't in the media namespace (synapse compat) routes["/_matrix/client/"+version+"/admin/purge_media_cache"] = route{"POST", purgeHandler} routes["/_matrix/client/"+version+"/admin/quarantine_media/{roomId:[^/]+}"] = route{"POST", quarantineRoomHandler} diff --git a/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go b/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go index b3d0a79bfcdb99ef25ef88ea61dc5c71f646d4c1..5fd309af92f523bcc262fb30beb6e74c34cacf41 100644 --- a/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go +++ b/src/github.com/turt2live/matrix-media-repo/cmd/media_repo/main.go @@ -11,6 +11,7 @@ import ( "github.com/turt2live/matrix-media-repo/common/logging" "github.com/turt2live/matrix-media-repo/metrics" "github.com/turt2live/matrix-media-repo/storage" + "github.com/turt2live/matrix-media-repo/storage/datastore" ) func main() { @@ -30,30 +31,13 @@ func main() { logrus.Info("Initializing datastores...") enabledDatastores := 0 - for _, ds:=range config.Get().Uploads.DataStores { + for _, ds := range config.Get().DataStores { if !ds.Enabled { continue } enabledDatastores++ - - uri := "" - if ds.Type == "file" { - path, pathFound := ds.Options["path"] - if !pathFound { - logrus.Fatal("Missing 'path' on file datastore") - } - uri = path - } else if ds.Type == "s3" { - endpoint, epFound := ds.Options["endpoint"] - bucket, bucketFound := ds.Options["bucketName"] - if !epFound || !bucketFound { - logrus.Fatal("Missing 'endpoint' or 'bucketName' on s3 datastore") - } - uri = fmt.Sprintf("s3://%s/%s", endpoint, bucket) - } else { - logrus.Fatal("Unknown datastore type: ", ds.Type) - } + uri := datastore.GetUriForDatastore(ds) _, err := storage.GetOrCreateDatastoreOfType(context.TODO(), &logrus.Entry{}, ds.Type, uri) if err != nil { @@ -71,6 +55,10 @@ func main() { logrus.Info(fmt.Sprintf("\t%s (%s): %s", ds.Type, ds.DatastoreId, ds.Uri)) } + if len(config.Get().Uploads.StoragePaths) > 0 { + logrus.Warn("You are using `storagePaths` in your configuration - in a future update, this will be removed. Please use datastores instead (see sample config).") + } + // TODO: https://github.com/minio/minio-go support logrus.Info("Starting media repository...") diff --git a/src/github.com/turt2live/matrix-media-repo/common/config/config.go b/src/github.com/turt2live/matrix-media-repo/common/config/config.go index 446b6b3e6c427e18bb68411b1eaea1fa69718159..c47832885f35decba908ea86b5fbea309c17dc22 100644 --- a/src/github.com/turt2live/matrix-media-repo/common/config/config.go +++ b/src/github.com/turt2live/matrix-media-repo/common/config/config.go @@ -33,7 +33,6 @@ type DatabaseConfig struct { type UploadsConfig struct { StoragePaths []string `yaml:"storagePaths,flow"` - DataStores []DatastoreConfig `yaml:"datastores"` MaxSizeBytes int64 `yaml:"maxBytes"` MinSizeBytes int64 `yaml:"minBytes"` AllowedTypes []string `yaml:"allowedTypes,flow"` @@ -42,10 +41,10 @@ type UploadsConfig struct { } type DatastoreConfig struct { - Type string `yaml:"type"` - Enabled bool `yaml:"enabled"` - Priority int `yaml:"priority"` - Options map[string]string `yaml:"opts,flow"` + Type string `yaml:"type"` + Enabled bool `yaml:"enabled"` + ForUploads bool `yaml:"forUploads"` + Options map[string]string `yaml:"opts,flow"` } type DownloadsConfig struct { @@ -128,6 +127,7 @@ type MediaRepoConfig struct { Homeservers []*HomeserverConfig `yaml:"homeservers,flow"` Admins []string `yaml:"admins,flow"` Database *DatabaseConfig `yaml:"database"` + DataStores []DatastoreConfig `yaml:"datastores"` Uploads *UploadsConfig `yaml:"uploads"` Downloads *DownloadsConfig `yaml:"downloads"` Thumbnails *ThumbnailsConfig `yaml:"thumbnails"` @@ -212,12 +212,12 @@ func NewDefaultConfig() *MediaRepoConfig { }, Homeservers: []*HomeserverConfig{}, Admins: []string{}, + DataStores: []DatastoreConfig{}, Uploads: &UploadsConfig{ MaxSizeBytes: 104857600, // 100mb MinSizeBytes: 100, ReportedMaxSizeBytes: 0, StoragePaths: []string{}, - DataStores: []DatastoreConfig{}, AllowedTypes: []string{"*/*"}, }, Downloads: &DownloadsConfig{ diff --git a/src/github.com/turt2live/matrix-media-repo/controllers/thumbnail_controller/thumbnail_resource_handler.go b/src/github.com/turt2live/matrix-media-repo/controllers/thumbnail_controller/thumbnail_resource_handler.go index 6bc75b35c840169851bc8907e60ef4d292595459..68d9571fcf85faa3d3fa5d6f5e209e4c07baca90 100644 --- a/src/github.com/turt2live/matrix-media-repo/controllers/thumbnail_controller/thumbnail_resource_handler.go +++ b/src/github.com/turt2live/matrix-media-repo/controllers/thumbnail_controller/thumbnail_resource_handler.go @@ -22,6 +22,7 @@ import ( "github.com/turt2live/matrix-media-repo/common/config" "github.com/turt2live/matrix-media-repo/metrics" "github.com/turt2live/matrix-media-repo/storage" + "github.com/turt2live/matrix-media-repo/storage/datastore" "github.com/turt2live/matrix-media-repo/types" "github.com/turt2live/matrix-media-repo/util" "github.com/turt2live/matrix-media-repo/util/resource_handler" @@ -288,13 +289,17 @@ func GenerateThumbnail(media *types.Media, width int, height int, method string, } // Reset the buffer pointer and store the file - datastore, relPath, err := storage.PersistFile(imgData, ctx, log) + ds, err := datastore.PickDatastore(ctx, log) + if err != nil { + return nil, err + } + relPath, err := ds.UploadFile(imgData, ctx, log) if err != nil { log.Error("Unexpected error saving thumbnail: " + err.Error()) return nil, err } - location := datastore.ResolveFilePath(relPath) + location := ds.ResolveFilePath(relPath) fileSize, err := util.FileSize(location) if err != nil { @@ -309,7 +314,7 @@ func GenerateThumbnail(media *types.Media, width int, height int, method string, } thumb.DiskLocation = relPath - thumb.DatastoreId = datastore.DatastoreId + thumb.DatastoreId = ds.DatastoreId thumb.ContentType = contentType thumb.SizeBytes = fileSize thumb.Sha256Hash = hash diff --git a/src/github.com/turt2live/matrix-media-repo/controllers/upload_controller/upload_controller.go b/src/github.com/turt2live/matrix-media-repo/controllers/upload_controller/upload_controller.go index 4ec7189282a2414ca7cc8b7a478b2a2eecaeb502..8dd49955a24808e3ab7a307487690ae41151d2cc 100644 --- a/src/github.com/turt2live/matrix-media-repo/controllers/upload_controller/upload_controller.go +++ b/src/github.com/turt2live/matrix-media-repo/controllers/upload_controller/upload_controller.go @@ -12,6 +12,7 @@ import ( "github.com/turt2live/matrix-media-repo/common" "github.com/turt2live/matrix-media-repo/common/config" "github.com/turt2live/matrix-media-repo/storage" + "github.com/turt2live/matrix-media-repo/storage/datastore" "github.com/turt2live/matrix-media-repo/types" "github.com/turt2live/matrix-media-repo/util" ) @@ -127,12 +128,14 @@ func IsAllowed(contentType string, reportedContentType string, userId string, lo } func StoreDirect(contents io.Reader, contentType string, filename string, userId string, origin string, mediaId string, ctx context.Context, log *logrus.Entry) (*types.Media, error) { - datastore, location, err := storage.PersistFile(contents, ctx, log) + ds, err := datastore.PickDatastore(ctx, log) if err != nil { return nil, err } + location, err := ds.UploadFile(contents, ctx, log) - fileLocation := datastore.ResolveFilePath(location) + // TODO: Support other datastore types by forking all these checks off to the datastore + fileLocation := ds.ResolveFilePath(location) fileMime, err := util.GetMimeType(fileLocation) if err != nil { @@ -234,7 +237,7 @@ func StoreDirect(contents io.Reader, contentType string, filename string, userId UserId: userId, Sha256Hash: hash, SizeBytes: fileSize, - DatastoreId: datastore.DatastoreId, + DatastoreId: ds.DatastoreId, Location: location, CreationTs: util.NowMillis(), } diff --git a/src/github.com/turt2live/matrix-media-repo/storage/datastore/datastore.go b/src/github.com/turt2live/matrix-media-repo/storage/datastore/datastore.go index f141945857ad6683ca6401460d1d8aad10cbd4ab..a2c486bf809934eb01634ca18373f8ba3659d69c 100644 --- a/src/github.com/turt2live/matrix-media-repo/storage/datastore/datastore.go +++ b/src/github.com/turt2live/matrix-media-repo/storage/datastore/datastore.go @@ -1,4 +1,136 @@ package datastore +import ( + "context" + "fmt" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/turt2live/matrix-media-repo/common/config" + "github.com/turt2live/matrix-media-repo/storage" + "github.com/turt2live/matrix-media-repo/types" +) + // TODO: Upload to DS // TODO: Download (get stream) from DS + +func GetUriForDatastore(dsConf config.DatastoreConfig) string { + if dsConf.Type == "file" { + path, pathFound := dsConf.Options["path"] + if !pathFound { + logrus.Fatal("Missing 'path' on file datastore") + } + return path + } else if dsConf.Type == "s3" { + endpoint, epFound := dsConf.Options["endpoint"] + bucket, bucketFound := dsConf.Options["bucketName"] + if !epFound || !bucketFound { + logrus.Fatal("Missing 'endpoint' or 'bucketName' on s3 datastore") + } + return fmt.Sprintf("s3://%s/%s", endpoint, bucket) + } else { + logrus.Fatal("Unknown datastore type: ", dsConf.Type) + } + + return "" +} + +func GetDatastoreConfig(ds *types.Datastore) (config.DatastoreConfig, error) { + for _, dsConf := range config.Get().DataStores { + if dsConf.Type == ds.Type && GetUriForDatastore(dsConf) == ds.Uri { + return dsConf, nil + } + } + + return config.DatastoreConfig{}, errors.New("datastore not found") +} + +func PickDatastore(ctx context.Context, log *logrus.Entry) (*DatastoreRef, error) { + // Legacy options first + storagePaths := config.Get().Uploads.StoragePaths + if len(storagePaths) > 0 { + log.Warn("Using legacy options to find a datastore") + + if len(storagePaths) == 1 { + ds, err := storage.GetOrCreateDatastoreOfType(ctx, log, "file", storagePaths[0]) + if err != nil { + return nil, err + } + return newDatastoreRef(ds), nil + } + + var basePath string + var pathSize int64 + for i := 0; i < len(storagePaths); i++ { + currPath := storagePaths[i] + ds, err := storage.GetOrCreateDatastoreOfType(ctx, log, "file", currPath) + if err != nil { + continue + } + + size, err := estimatedDatastoreSize(ds, ctx, log) + if err != nil { + continue + } + + if basePath == "" || size < pathSize { + basePath = currPath + pathSize = size + } + } + + if basePath != "" { + ds, err := storage.GetOrCreateDatastoreOfType(ctx, log, "file", basePath) + if err != nil { + return nil, err + } + return newDatastoreRef(ds), nil + } + } + + // If we haven't found a legacy option, pick a datastore + log.Info("Finding a suitable datastore to pick for uploads") + confDatastores := config.Get().DataStores + mediaStore := storage.GetDatabase().GetMediaStore(ctx, log) + + var targetDs *types.Datastore + var dsSize int64 + for _, dsConf := range confDatastores { + if !dsConf.Enabled { + continue + } + + ds, err := mediaStore.GetDatastoreByUri(GetUriForDatastore(dsConf)) + if err != nil { + continue + } + + size, err := estimatedDatastoreSize(ds, ctx, log) + logrus.Info(ds.Uri) + logrus.Info(size) + if err != nil { + continue + } + + if targetDs == nil || size < dsSize { + logrus.Info("Using ", ds.Uri) + targetDs = ds + dsSize = size + } + } + + if targetDs != nil { + return newDatastoreRef(targetDs), nil + } + + return nil, errors.New("failed to pick a datastore: none available") +} + +func estimatedDatastoreSize(ds *types.Datastore, ctx context.Context, log *logrus.Entry) (int64, error) { + if ds.Type == "file" { + return storage.GetDatabase().GetMetadataStore(ctx, log).GetSizeOfFolderBytes(ds.Uri) + } else { + // We can't estimate the size of other datastores, so don't + return 0, nil + } +} diff --git a/src/github.com/turt2live/matrix-media-repo/storage/datastore/datastore_ref.go b/src/github.com/turt2live/matrix-media-repo/storage/datastore/datastore_ref.go new file mode 100644 index 0000000000000000000000000000000000000000..95d7a2a0b8844a758fc79539d93fc8e31eaa3a2e --- /dev/null +++ b/src/github.com/turt2live/matrix-media-repo/storage/datastore/datastore_ref.go @@ -0,0 +1,43 @@ +package datastore + +import ( + "context" + "errors" + "io" + + "github.com/sirupsen/logrus" + "github.com/turt2live/matrix-media-repo/storage" + "github.com/turt2live/matrix-media-repo/types" +) + +type DatastoreRef struct { + // TODO: Don't blindly copy properties from types.Datastore + DatastoreId string + Type string + Uri string + + datastore *types.Datastore +} + +func newDatastoreRef(ds *types.Datastore) *DatastoreRef { + return &DatastoreRef{ + DatastoreId: ds.DatastoreId, + Type: ds.Type, + Uri: ds.Uri, + datastore: ds, + } +} + +func (d *DatastoreRef) ResolveFilePath(location string) (string) { + return d.datastore.ResolveFilePath(location) +} + +func (d *DatastoreRef) UploadFile(file io.Reader, ctx context.Context, log *logrus.Entry) (string, error) { + if d.Type == "file" { + return storage.PersistFile(d.Uri, file, ctx, log) + } else if d.Type == "s3" { + panic("failed to upload file") + } else { + return "", errors.New("unknown datastore type") + } +} diff --git a/src/github.com/turt2live/matrix-media-repo/storage/file_store.go b/src/github.com/turt2live/matrix-media-repo/storage/file_store.go index 54786b5d4cec6e9ee418944c8334424eaa55834f..27a80285c4752b0874026e2033372ba972d71258 100644 --- a/src/github.com/turt2live/matrix-media-repo/storage/file_store.go +++ b/src/github.com/turt2live/matrix-media-repo/storage/file_store.go @@ -10,35 +10,10 @@ import ( "path" "github.com/sirupsen/logrus" - "github.com/turt2live/matrix-media-repo/common/config" - "github.com/turt2live/matrix-media-repo/types" "github.com/turt2live/matrix-media-repo/util" ) -func PersistFile(file io.Reader, ctx context.Context, log *logrus.Entry) (*types.Datastore, string, error) { - var basePath string - if len(config.Get().Uploads.StoragePaths) != 1 { - var pathSize int64 - for i := 0; i < len(config.Get().Uploads.StoragePaths); i++ { - currPath := config.Get().Uploads.StoragePaths[i] - size, err := GetDatabase().GetMetadataStore(ctx, log).GetSizeOfFolderBytes(currPath) - if err != nil { - continue - } - if basePath == "" || size < pathSize { - basePath = currPath - pathSize = size - } - } - } else { - basePath = config.Get().Uploads.StoragePaths[0] - } - - if basePath == "" { - return nil, "", errors.New("could not find a suitable base path") - } - log.Info("Using the base path: " + basePath) - +func PersistFile(basePath string, file io.Reader, ctx context.Context, log *logrus.Entry) (string, error) { exists := true var primaryContainer string var secondaryContainer string @@ -49,7 +24,7 @@ func PersistFile(file io.Reader, ctx context.Context, log *logrus.Entry) (*types for exists { fileId, err := util.GenerateRandomString(64) if err != nil { - return nil, "", err + return "", err } primaryContainer = fileId[0:2] @@ -69,35 +44,28 @@ func PersistFile(file io.Reader, ctx context.Context, log *logrus.Entry) (*types // Infinite loop protection if attempts > 5 { - return nil, "", errors.New("failed to find a suitable directory") + return "", errors.New("failed to find a suitable directory") } } err := os.MkdirAll(targetDir, 0755) if err != nil { - return nil, "", err + return "", err } f, err := os.OpenFile(targetFile, os.O_WRONLY|os.O_CREATE, 0644) if err != nil { - return nil, "", err + return "", err } defer f.Close() _, err = io.Copy(f, file) if err != nil { - return nil, "", err + return "", err } locationPath := path.Join(primaryContainer, secondaryContainer, fileName) - datastorePath := basePath - - datastore, err := GetOrCreateDatastore(ctx, log, datastorePath) - if err != nil { - return nil, "", err - } - - return datastore, locationPath, nil + return locationPath, nil } func GetFileHash(filePath string) (string, error) { diff --git a/src/github.com/turt2live/matrix-media-repo/storage/stores/metadata_store.go b/src/github.com/turt2live/matrix-media-repo/storage/stores/metadata_store.go index f3d3cbefc6099bf67c362436c4f8a0f104cd4c06..f13014ec0a33d9c3eb35bcea41ad5e8f27c9533c 100644 --- a/src/github.com/turt2live/matrix-media-repo/storage/stores/metadata_store.go +++ b/src/github.com/turt2live/matrix-media-repo/storage/stores/metadata_store.go @@ -61,6 +61,7 @@ func (s *MetadataStore) UpsertLastAccess(sha256Hash string, timestamp int64) (er return err } +// TODO: Handle datastores instead func (s *MetadataStore) GetSizeOfFolderBytes(folderPath string) (int64, error) { r := &folderSize{} err := s.statements.selectSizeOfFolder.QueryRowContext(s.ctx, folderPath).Scan(&r.Size)