From a73876dce58db1764d33f9cb66d2e60b68acc58d Mon Sep 17 00:00:00 2001 From: Travis Ralston <travpc@gmail.com> Date: Sun, 21 May 2023 16:33:25 -0600 Subject: [PATCH] Start cutover to new datastores structure --- CHANGELOG.md | 75 +++++++++++++++++++ cmd/media_repo/reloads.go | 4 +- common/assets/process.go | 2 +- common/config/access.go | 13 +--- common/config/models_domain.go | 2 +- common/logging/logger.go | 21 ++++++ common/runtime/init.go | 67 +++++------------ config.sample.yaml | 6 +- database/db.go | 27 +++++-- database/table_datastores.go | 132 --------------------------------- database/table_media.go | 69 +++++++++++++++++ storage/datastore/datastore.go | 8 -- 12 files changed, 219 insertions(+), 207 deletions(-) delete mode 100644 database/table_datastores.go create mode 100644 database/table_media.go diff --git a/CHANGELOG.md b/CHANGELOG.md index e3f61bf0..dc9a2973 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,79 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] +### Mandatory Configuration Change + +Datastores are no longer managed by matrix-media-repo internally, meaning you MUST specify a datastore ID on each of your +configured datastores. If you're setting up matrix-media-repo for the first time then you can use whatever you want for +a datastore ID (though it's recommended to stick to alphanumeric strings). If you're *upgrading* to this version however, +you will need to pull the datastore IDs out of the matrix-media-repo and add them to your configuration. + +**For safety, the datastores table is *not* deleted from the database in this upgrade. A future version may drop the table, +however.** + +#### Getting existing datastore IDs + +**Before upgrading**, you can get your datastore IDs fairly easily. The best way might be to look at the startup log of +your media repo: + +```text +INFO[2023-05-21 20:58:45.116 Z] Datastores: +INFO[2023-05-21 20:58:45.116 Z] file (e9ce13bbb062383ce1bcee76414058668877f2d51635810652335374336): /mnt/mmr-store/location4 +INFO[2023-05-21 20:58:45.117 Z] s3 (7669e2fb8ccaa0801e4255a417ad20884f76b8611659655069202644992): s3://redacted.r2.cloudflarestorage.com/redacted +``` + +This way, you're able to correlate locations to IDs. For example, the `file` datastore configured to put media at +`/mnt/mmr-store/location4` has ID `e9ce13bbb062383ce1bcee76414058668877f2d51635810652335374336`. Add this as +`id: "e9ce13bbb062383ce1bcee76414058668877f2d51635810652335374336"` in your media repo config file. + +Alternatively, you can use the admin API to get your datastores: + +```text +curl -s -X GET -H "Authorization: Bearer YOUR_ACCESS_TOKEN" https://example.org/_matrix/media/unstable/admin/datastores +{ + "e9ce13bbb062383ce1bcee76414058668877f2d51635810652335374336": { + "type": "file", + "uri": "/mnt/mmr-store/location4" + }, + "7669e2fb8ccaa0801e4255a417ad20884f76b8611659655069202644992": { + "type": "s3", + "uri": "s3://redacted.r2.cloudflarestorage.com/redacted" + } +} +``` + +The returned object is keyed by ID over the API. + +In either case, take the ID and add it to the associated datastore in your config, similar to the following: + +```yaml +# Your specific configuration may be different +datastores: + - type: file + id: "e9ce13bbb062383ce1bcee76414058668877f2d51635810652335374336" ## ADD THIS + enabled: true + forKinds: ["archives"] + opts: + path: "/mnt/mmr-store/location4" + - type: s3 + id: "7669e2fb8ccaa0801e4255a417ad20884f76b8611659655069202644992" ## ADD THIS + enabled: true + forKinds: ["all"] + opts: + ssl: true + tempPath: "/mnt/mmr-store/s3-staging" + endpoint: redacted.r2.cloudflarestorage.com + accessKeyId: "redacted" + accessSecret: "redacted" + bucketName: "redacted" +``` + +**Note**: If matrix-media-repo detects that a datastore ID is used but not referenced in the config then it will refuse +to start. + +This new configuration style additionally allows for out-of-band datastore transfers. If you move all your data to a new +path/server, for example, then you can simply update the path in the config for that datastore. + ### Added * Added a `federation.ignoredHosts` config option to block media from individual homeservers. @@ -17,6 +90,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Changed +* **Mandatory configuration change**: You must add datastore IDs to your datastore configuration, as matrix-media-repo will no longer manage datastores for you. +* Datastores no longer use the `enabled` flag set on them. Use `forKinds: []` instead. * Some admin endpoints for purging media, quarantining media, and background task information now require additional path components. See [docs/admin.md](./docs/admin.md) for more information. * Updated to Go 1.19 diff --git a/cmd/media_repo/reloads.go b/cmd/media_repo/reloads.go index 79cb16f4..f7881eed 100644 --- a/cmd/media_repo/reloads.go +++ b/cmd/media_repo/reloads.go @@ -5,10 +5,10 @@ import ( "github.com/turt2live/matrix-media-repo/api/_auth_cache" "github.com/turt2live/matrix-media-repo/common/globals" "github.com/turt2live/matrix-media-repo/common/runtime" + "github.com/turt2live/matrix-media-repo/database" "github.com/turt2live/matrix-media-repo/internal_cache" "github.com/turt2live/matrix-media-repo/metrics" "github.com/turt2live/matrix-media-repo/plugins" - "github.com/turt2live/matrix-media-repo/storage" "github.com/turt2live/matrix-media-repo/tasks" ) @@ -69,7 +69,7 @@ func reloadDatabaseOnChan(reloadChan chan bool) { for { shouldReload := <-reloadChan if shouldReload { - storage.ReloadDatabase() + database.Reload() runtime.LoadDatabase() globals.DatastoresReloadChan <- true } else { diff --git a/common/assets/process.go b/common/assets/process.go index b154e59d..947d869e 100644 --- a/common/assets/process.go +++ b/common/assets/process.go @@ -69,7 +69,7 @@ func SetupAssets(givenAssetsPath string) { func Cleanup() { if tempMigrations != "" { - logrus.Info("Cleaning up temporary assets directory: ", tempMigrations) + logrus.Info("Cleaning up temporary migrations directory: ", tempMigrations) os.Remove(tempMigrations) } if tempTemplates != "" { diff --git a/common/config/access.go b/common/config/access.go index 047a766d..91cffceb 100644 --- a/common/config/access.go +++ b/common/config/access.go @@ -248,15 +248,10 @@ func UniqueDatastores() []DatastoreConfig { for _, d := range AllDomains() { for _, dsc := range d.DataStores { found := false - for _, edsc := range confs { - if edsc.Type == dsc.Type { - if dsc.Type == "file" && edsc.Options["path"] == dsc.Options["path"] { - found = true - break - } else if dsc.Type == "s3" && edsc.Options["endpoint"] == dsc.Options["endpoint"] && edsc.Options["bucketName"] == dsc.Options["bucketName"] { - found = true - break - } + for _, existingDsc := range confs { + if existingDsc.Id == dsc.Id { + found = true + break } } if found { diff --git a/common/config/models_domain.go b/common/config/models_domain.go index 0c3fc55b..40adc886 100644 --- a/common/config/models_domain.go +++ b/common/config/models_domain.go @@ -24,8 +24,8 @@ type UploadsConfig struct { } type DatastoreConfig struct { + Id string `yaml:"id"` Type string `yaml:"type"` - Enabled bool `yaml:"enabled"` MediaKinds []string `yaml:"forKinds,flow"` Options map[string]string `yaml:"opts,flow"` } diff --git a/common/logging/logger.go b/common/logging/logger.go index e3531cd4..d9d5c098 100644 --- a/common/logging/logger.go +++ b/common/logging/logger.go @@ -5,6 +5,7 @@ import ( "path" "time" + "github.com/DavidHuie/gomigrate" "github.com/lestrrat/go-file-rotatelogs" "github.com/rifflock/lfshook" "github.com/sirupsen/logrus" @@ -76,3 +77,23 @@ func Setup(dir string, colors bool, json bool, level string) error { return nil } + +type GoMigrateLogger struct { + gomigrate.Logger +} + +func (*GoMigrateLogger) Print(v ...interface{}) { + logrus.Debug(v...) +} + +func (*GoMigrateLogger) Printf(format string, v ...interface{}) { + logrus.Debugf(format, v...) +} + +func (*GoMigrateLogger) Println(v ...interface{}) { + logrus.Debugln(v...) +} + +func (*GoMigrateLogger) Fatalf(format string, v ...interface{}) { + logrus.Fatalf(format, v...) +} diff --git a/common/runtime/init.go b/common/runtime/init.go index 4aaf5065..e79b745d 100644 --- a/common/runtime/init.go +++ b/common/runtime/init.go @@ -1,9 +1,8 @@ package runtime import ( - "fmt" - "github.com/getsentry/sentry-go" + "github.com/turt2live/matrix-media-repo/database" "github.com/turt2live/matrix-media-repo/util/ids" "github.com/sirupsen/logrus" @@ -11,9 +10,6 @@ import ( "github.com/turt2live/matrix-media-repo/common/rcontext" "github.com/turt2live/matrix-media-repo/common/version" "github.com/turt2live/matrix-media-repo/plugins" - "github.com/turt2live/matrix-media-repo/storage" - "github.com/turt2live/matrix-media-repo/storage/datastore" - "github.com/turt2live/matrix-media-repo/storage/datastore/ds_s3" ) func RunStartupSequence() { @@ -28,59 +24,36 @@ func RunStartupSequence() { func LoadDatabase() { logrus.Info("Preparing database...") - storage.GetDatabase() + database.GetInstance() } func LoadDatastores() { - mediaStore := storage.GetDatabase().GetMediaStore(rcontext.Initial()) - - logrus.Info("Initializing datastores...") - for _, ds := range config.UniqueDatastores() { - if !ds.Enabled { - continue - } - - uri := datastore.GetUriForDatastore(ds) - - _, err := storage.GetOrCreateDatastoreOfType(rcontext.Initial(), ds.Type, uri) - if err != nil { - sentry.CaptureException(err) - logrus.Fatal(err) - } - } + mediaDb := database.GetInstance().Media.Prepare(rcontext.Initial()) - // Print all the known datastores at startup. Doubles as a way to initialize the database. - datastores, err := mediaStore.GetAllDatastores() + logrus.Info("Comparing datastores against config...") + storeIds, err := mediaDb.GetDistinctDatastoreIds() if err != nil { sentry.CaptureException(err) logrus.Fatal(err) } - logrus.Info("Datastores:") - for _, ds := range datastores { - logrus.Info(fmt.Sprintf("\t%s (%s): %s", ds.Type, ds.DatastoreId, ds.Uri)) - if ds.Type == "s3" { - conf, err := datastore.GetDatastoreConfig(ds) - if err != nil { - continue - } - - s3, err := ds_s3.GetOrCreateS3Datastore(ds.DatastoreId, conf) - if err != nil { - continue - } - - err = s3.EnsureBucketExists() - if err != nil { - logrus.Warn("\t\tBucket does not exist!") - } - - err = s3.EnsureTempPathExists() - if err != nil { - logrus.Warn("\t\tTemporary path does not exist!") - } + dsMap := make(map[string]bool) + for _, id := range storeIds { + dsMap[id] = false + } + for _, ds := range config.UniqueDatastores() { + dsMap[ds.Id] = true + } + fatal := false + for id, found := range dsMap { + if !found { + logrus.Errorf("No configured datastore for ID %s found - please check your configuration and restart.", id) + fatal = true } } + if fatal { + logrus.Fatal("One or more datastores are not configured") + } } func CheckIdGenerator() { diff --git a/config.sample.yaml b/config.sample.yaml index a3900d94..13118b09 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -154,12 +154,14 @@ sharedSecretAuth: # for a datastore to use, will always use the smallest datastore first. datastores: - type: file - enabled: false # Enable this to set up data storage. + id: "UNIQUE_ID_HERE" # ID for this datastore (cannot change). Alphanumeric recommended. # Datastores can be split into many areas when handling uploads. Media is still de-duplicated # across all datastores (local content which duplicates remote content will re-use the remote # content's location). This option is useful if your datastore is becoming very large, or if # you want faster storage for a particular kind of media. # + # To disable this datastore, making it readonly, specify `forKinds: []`. + # # The kinds available are: # thumbnails - Used to store thumbnails of media (local and remote). # remote_media - Original copies of remote media (servers not configured by this repo). @@ -170,7 +172,7 @@ datastores: path: /var/matrix/media - type: s3 - enabled: false # Enable this to set up s3 uploads + id: "ANOTHER_UNIQUE_ID_HERE" # ID for this datastore (cannot change). Alphanumeric recommended. forKinds: ["thumbnails", "remote_media", "local_media", "archives"] opts: # The s3 uploader needs a temporary location to buffer files to reduce memory usage on diff --git a/database/db.go b/database/db.go index 7b1e1795..319ccc8a 100644 --- a/database/db.go +++ b/database/db.go @@ -6,13 +6,16 @@ import ( "sync" "github.com/DavidHuie/gomigrate" + "github.com/getsentry/sentry-go" + _ "github.com/lib/pq" // postgres driver "github.com/sirupsen/logrus" "github.com/turt2live/matrix-media-repo/common/config" + "github.com/turt2live/matrix-media-repo/common/logging" ) type Database struct { - conn *sql.DB - Datastores *dsTableStatements + conn *sql.DB + Media *mediaTableStatements } var instance *Database @@ -33,6 +36,19 @@ func GetInstance() *Database { return instance } +func Reload() { + if instance != nil { + if err := instance.conn.Close(); err != nil { + logrus.Error(err) + sentry.CaptureException(err) + } + } + + instance = nil + singleton = &sync.Once{} + GetInstance() +} + func openDatabase(connectionString string, maxConns int, maxIdleConns int) error { d := &Database{} var err error @@ -45,7 +61,7 @@ func openDatabase(connectionString string, maxConns int, maxIdleConns int) error // Run migrations var migrator *gomigrate.Migrator - if migrator, err = gomigrate.NewMigratorWithLogger(d.conn, gomigrate.Postgres{}, config.Runtime.MigrationsPath, logrus.StandardLogger()); err != nil { + if migrator, err = gomigrate.NewMigratorWithLogger(d.conn, gomigrate.Postgres{}, config.Runtime.MigrationsPath, &logging.GoMigrateLogger{}); err != nil { return errors.New("error setting up migrator: " + err.Error()) } if err = migrator.Migrate(); err != nil { @@ -53,9 +69,10 @@ func openDatabase(connectionString string, maxConns int, maxIdleConns int) error } // Prepare the table accessors - if d.Datastores, err = prepareDatastoreTables(d.conn); err != nil { - return errors.New("failed to create datastores table accessor") + if d.Media, err = prepareMediaTables(d.conn); err != nil { + return errors.New("failed to create media table accessor") } + instance = d return nil } diff --git a/database/table_datastores.go b/database/table_datastores.go deleted file mode 100644 index fd2e72ce..00000000 --- a/database/table_datastores.go +++ /dev/null @@ -1,132 +0,0 @@ -package database - -import ( - "database/sql" - "errors" - "sync" - - "github.com/turt2live/matrix-media-repo/common/rcontext" -) - -type DbDatastore struct { - DatastoreId string - Type string - Uri string -} - -const selectAllDatastores = "SELECT datastore_id, ds_type, uri FROM datastores;" -const selectDatastore = "SELECT datastore_id, ds_type, uri FROM datastores WHERE datastore_id = $1;" -const selectDatastoreByUri = "SELECT datastore_id, ds_type, uri FROM datastores WHERE uri = $1;" -const insertDatastore = "INSERT INTO datastores (datastore_id, ds_type, uri) VALUES ($1, $2, $3);" - -var dsCacheByPath = sync.Map{} // [string] => Datastore -var dsCacheById = sync.Map{} // [string] => Datastore - -type dsTableStatements struct { - selectAllDatastores *sql.Stmt - selectDatastore *sql.Stmt - selectDatastoreByUri *sql.Stmt - insertDatastore *sql.Stmt -} - -type dsTableWithContext struct { - statements *dsTableStatements - ctx rcontext.RequestContext -} - -func prepareDatastoreTables(db *sql.DB) (*dsTableStatements, error) { - var err error - var stmts = &dsTableStatements{} - - if stmts.selectAllDatastores, err = db.Prepare(selectAllDatastores); err != nil { - return nil, errors.New("error preparing selectAllDatastores: " + err.Error()) - } - if stmts.selectDatastore, err = db.Prepare(selectDatastore); err != nil { - return nil, errors.New("error preparing selectDatastore: " + err.Error()) - } - if stmts.selectDatastoreByUri, err = db.Prepare(selectDatastoreByUri); err != nil { - return nil, errors.New("error preparing selectDatastoreByUri: " + err.Error()) - } - if stmts.insertDatastore, err = db.Prepare(insertDatastore); err != nil { - return nil, errors.New("error preparing insertDatastore: " + err.Error()) - } - - return stmts, nil -} - -func (s *dsTableStatements) Prepare(ctx rcontext.RequestContext) *dsTableWithContext { - return &dsTableWithContext{ - statements: s, - ctx: ctx, - } -} - -func (s *dsTableWithContext) GetDatastore(id string) (*DbDatastore, error) { - if v, ok := dsCacheById.Load(id); ok { - ds := v.(*DbDatastore) - return &DbDatastore{ - DatastoreId: ds.DatastoreId, - Type: ds.Type, - Uri: ds.Uri, - }, nil - } - - d := &DbDatastore{} - if err := s.statements.selectDatastore.QueryRowContext(s.ctx, id).Scan(&d.DatastoreId, &d.Type, &d.Uri); err != nil { - return nil, err - } - - dsCacheById.Store(d.DatastoreId, d) - dsCacheByPath.Store(d.Uri, d) - - return d, nil -} - -func (s *dsTableWithContext) GetDatastoreByUri(uri string) (*DbDatastore, error) { - if v, ok := dsCacheByPath.Load(uri); ok { - ds := v.(*DbDatastore) - return &DbDatastore{ - DatastoreId: ds.DatastoreId, - Type: ds.Type, - Uri: ds.Uri, - }, nil - } - - d := &DbDatastore{} - if err := s.statements.selectDatastoreByUri.QueryRowContext(s.ctx, uri).Scan(&d.DatastoreId, &d.Type, &d.Uri); err != nil { - return nil, err - } - - dsCacheById.Store(d.DatastoreId, d) - dsCacheByPath.Store(d.Uri, d) - - return d, nil -} - -func (s *dsTableWithContext) GetAllDatastores() ([]*DbDatastore, error) { - rows, err := s.statements.selectAllDatastores.QueryContext(s.ctx) - if err != nil { - return nil, err - } - - var results []*DbDatastore - for rows.Next() { - obj := &DbDatastore{} - if err = rows.Scan(&obj.DatastoreId, &obj.Type, &obj.Uri); err != nil { - return nil, err - } - } - - return results, nil -} - -func (s *dsTableWithContext) InsertDatastore(ds *DbDatastore) error { - if _, err := s.statements.insertDatastore.ExecContext(s.ctx, ds.DatastoreId, ds.Type, ds.Uri); err != nil { - return errors.New("error persiting datastore record: " + err.Error()) - } - - dsCacheById.Store(ds.DatastoreId, ds) - dsCacheByPath.Store(ds.Uri, ds) - - return nil -} diff --git a/database/table_media.go b/database/table_media.go new file mode 100644 index 00000000..e068ee0a --- /dev/null +++ b/database/table_media.go @@ -0,0 +1,69 @@ +package database + +import ( + "database/sql" + "errors" + + "github.com/turt2live/matrix-media-repo/common/rcontext" +) + +type DbMedia struct { + Origin string + MediaId string + UploadName string + ContentType string + UserId string + Sha256Hash string + SizeBytes int64 + CreationTs int64 + Quarantined bool + DatastoreId string + Location string +} + +const selectDistinctMediaDatastoreIds = "SELECT DISTINCT datastore_id FROM media;" + +type mediaTableStatements struct { + selectDistinctMediaDatastoreIds *sql.Stmt +} + +type mediaTableWithContext struct { + statements *mediaTableStatements + ctx rcontext.RequestContext +} + +func prepareMediaTables(db *sql.DB) (*mediaTableStatements, error) { + var err error + var stmts = &mediaTableStatements{} + + if stmts.selectDistinctMediaDatastoreIds, err = db.Prepare(selectDistinctMediaDatastoreIds); err != nil { + return nil, errors.New("error preparing selectDistinctMediaDatastoreIds: " + err.Error()) + } + + return stmts, nil +} + +func (s *mediaTableStatements) Prepare(ctx rcontext.RequestContext) *mediaTableWithContext { + return &mediaTableWithContext{ + statements: s, + ctx: ctx, + } +} + +func (s *mediaTableWithContext) GetDistinctDatastoreIds() ([]string, error) { + rows, err := s.statements.selectDistinctMediaDatastoreIds.QueryContext(s.ctx) + if err != nil { + return nil, err + } + + var results []string + for rows.Next() { + val := "" + if err = rows.Scan(&val); err != nil { + return nil, err + } + results = append(results, val) + } + + return results, nil +} diff --git a/storage/datastore/datastore.go b/storage/datastore/datastore.go index e03c3cf8..dd6faa6e 100644 --- a/storage/datastore/datastore.go +++ b/storage/datastore/datastore.go @@ -18,10 +18,6 @@ import ( func GetAvailableDatastores(ctx rcontext.RequestContext) ([]*types.Datastore, error) { datastores := make([]*types.Datastore, 0) for _, ds := range ctx.Config.DataStores { - if !ds.Enabled { - continue - } - uri := GetUriForDatastore(ds) dsInstance, err := storage.GetOrCreateDatastoreOfType(rcontext.Initial(), ds.Type, uri) @@ -106,10 +102,6 @@ func PickDatastore(forKind string, ctx rcontext.RequestContext) (*DatastoreRef, // size of the datastore). var possibleDatastores = make([]config.DatastoreConfig, 0) for _, dsConf := range confDatastores { - if !dsConf.Enabled { - continue - } - allowed := common.HasKind(dsConf.MediaKinds, forKind) if !allowed { continue -- GitLab