From 47ece80ba8b3f725fa8a2cc5f673fb249dbe5a2a Mon Sep 17 00:00:00 2001 From: Travis Ralston <travpc@gmail.com> Date: Sun, 7 Jan 2018 04:36:14 -0700 Subject: [PATCH] Synapse import This is done over http to ensure that synapse is left with a backup of the media repository. Adds #4 --- README.md | 28 +++++ .../cmd/import_synapse/main.go | 110 ++++++++++++++++-- .../turt2live/matrix-media-repo/synapse/db.go | 74 ++++++++++++ .../synapse/homeserver_yaml.go | 50 -------- .../matrix-media-repo/synapse/local_media.go | 10 ++ vendor/manifest | 6 + 6 files changed, 218 insertions(+), 60 deletions(-) create mode 100644 src/github.com/turt2live/matrix-media-repo/synapse/db.go delete mode 100644 src/github.com/turt2live/matrix-media-repo/synapse/homeserver_yaml.go create mode 100644 src/github.com/turt2live/matrix-media-repo/synapse/local_media.go diff --git a/README.md b/README.md index 8f7baf86..46b171ec 100644 --- a/README.md +++ b/README.md @@ -107,3 +107,31 @@ listeners: - names: [federation] compress: false ``` + +# Importing media from synapse + +Media is imported by connecting to your synapse database and downloading all the content from the homeserver. This is so you have a backup of the media repository still with synapse. **Do not point traffic at the media repo until after the import is complete.** + +1. Build the media repo +2. Configure the `media-repo.yaml` +3. Run `bin/import_synapse`. The usage is below. + ``` + Usage of ./bin/import_synapse: + -baseUrl string + The base URL to access your homeserver with (default "http://localhost:8008") + -dbHost string + The IP or hostname of the postgresql server with the synapse database (default "localhost") + -dbName string + The name of the synapse database (default "synapse") + -dbPassword string + The password to authorize the postgres user. Can be omitted to be prompted when run + -dbPort int + The port to access postgres on (default 5432) + -dbUsername string + The username to access postgres with (default "synapse") + -serverName string + The name of your homeserver (eg: matrix.org) (default "localhost") + ``` + Assuming the media repository, postgres database, and synapse are all on the same host, the command to run would look something like: `bin/import_synapse -serverName myserver.com -dbUsername my_database_user -dbName synapse` +4. Wait for the import to complete. The script will automatically deduplicate media. +5. Point traffic to the media repository diff --git a/src/github.com/turt2live/matrix-media-repo/cmd/import_synapse/main.go b/src/github.com/turt2live/matrix-media-repo/cmd/import_synapse/main.go index 1103202f..53ab80cd 100644 --- a/src/github.com/turt2live/matrix-media-repo/cmd/import_synapse/main.go +++ b/src/github.com/turt2live/matrix-media-repo/cmd/import_synapse/main.go @@ -1,37 +1,127 @@ package main import ( + "context" + "errors" "flag" "fmt" + "io" + "net/http" + "strconv" + "github.com/howeyc/gopass" + "github.com/sirupsen/logrus" "github.com/turt2live/matrix-media-repo/config" + "github.com/turt2live/matrix-media-repo/logging" + "github.com/turt2live/matrix-media-repo/rcontext" + "github.com/turt2live/matrix-media-repo/services" "github.com/turt2live/matrix-media-repo/storage" "github.com/turt2live/matrix-media-repo/synapse" ) func main() { + postgresHost := flag.String("dbHost", "localhost", "The IP or hostname of the postgresql server with the synapse database") + postgresPort := flag.Int("dbPort", 5432, "The port to access postgres on") + postgresUsername := flag.String("dbUsername", "synapse", "The username to access postgres with") + postgresPassword := flag.String("dbPassword", "", "The password to authorize the postgres user. Can be omitted to be prompted when run") + postgresDatabase := flag.String("dbName", "synapse", "The name of the synapse database") + baseUrl := flag.String("baseUrl", "http://localhost:8008", "The base URL to access your homeserver with") + serverName := flag.String("serverName", "localhost", "The name of your homeserver (eg: matrix.org)") + flag.Parse() + + var realPsqlPassword string + if *postgresPassword == "" { + fmt.Printf("Postgres password: ") + pass, err := gopass.GetPasswd() + if err != nil { + panic(err) + } + realPsqlPassword = string(pass[:]) + } else { + realPsqlPassword = *postgresPassword + } + c, err := config.ReadConfig() if err != nil { panic(err) } + err = logging.Setup(c.General.LogDirectory) + if err != nil { + panic(err) + } + + logrus.Info("Setting up for importing...") + db, err := storage.OpenDatabase(c.Database.Postgres) if err != nil { panic(err) } - homeserverYamlPath := flag.String("homeserver", "homeserver.yaml", "Path to your homeserver.yaml") - moveFiles := flag.Bool("move", false, "If set, files will be moved instead of copied") - importRemote := flag.Bool("remote", false, "If set, remote media will also be imported") - flag.Parse() + connectionString := "postgres://" + *postgresUsername + ":" + realPsqlPassword + "@" + *postgresHost + ":" + strconv.Itoa(*postgresPort) + "/" + *postgresDatabase + "?sslmode=disable" + csApiUrl := *baseUrl + if csApiUrl[len(csApiUrl)-1:] == "/" { + csApiUrl = csApiUrl[:len(csApiUrl)-1] + } + + logrus.Info("Connecting to synapse database...") + synDb, err := synapse.OpenDatabase(connectionString) + if err != nil { + panic(err) + } - hsConfig, err := synapse.ReadConfig(*homeserverYamlPath) + logrus.Info("Fetching all local media records from synapse...") + records, err := synDb.GetAllMedia() if err != nil { panic(err) } - fmt.Println(*moveFiles) - fmt.Println(*importRemote) - fmt.Println(*db) - fmt.Println(hsConfig.GetConnectionString()) -} \ No newline at end of file + logrus.Info(fmt.Sprintf("Downloading %d media records", len(records))) + ctx := context.TODO() + for i := 0; i < len(records); i++ { + percent := int((float32(i+1) / float32(len(records))) * 100) + record := records[i] + + info := rcontext.RequestInfo{ + Log: logrus.WithFields(logrus.Fields{ + "mediaId": record.MediaId, + }), + Context: ctx, + Db: *db, + Config: c, + } + + info.Log.Info(fmt.Sprintf("Downloading %s (%d/%d %d%%)", record.MediaId, i+1, len(records), percent)) + + body, err := downloadMedia(csApiUrl, *serverName, record.MediaId) + if err != nil { + info.Log.Error(err.Error()) + continue + } + + svc := services.CreateMediaService(info) + + _, err = svc.StoreMedia(body, record.ContentType, record.UploadName, record.UserId, *serverName, record.MediaId) + if err != nil { + info.Log.Error(err.Error()) + continue + } + + body.Close() + } + + logrus.Info("Import completed") +} + +func downloadMedia(baseUrl string, serverName string, mediaId string) (io.ReadCloser, error) { + downloadUrl := baseUrl + "/_matrix/media/r0/download/" + serverName + "/" + mediaId + resp, err := http.Get(downloadUrl) + if err != nil { + return nil, err + } + if resp.StatusCode != http.StatusOK { + return nil, errors.New("received status code " + strconv.Itoa(resp.StatusCode)) + } + + return resp.Body, nil +} diff --git a/src/github.com/turt2live/matrix-media-repo/synapse/db.go b/src/github.com/turt2live/matrix-media-repo/synapse/db.go new file mode 100644 index 00000000..dd4c2945 --- /dev/null +++ b/src/github.com/turt2live/matrix-media-repo/synapse/db.go @@ -0,0 +1,74 @@ +package synapse + +import ( + "database/sql" + + _ "github.com/lib/pq" // postgres driver +) + +const selectLocalMedia = "SELECT media_id, media_type, media_length, created_ts, upload_name, user_id FROM local_media_repository;" + +type SynapseDatabase struct { + db *sql.DB + statements statements +} + +type statements struct { + selectLocalMedia *sql.Stmt +} + +func OpenDatabase(connectionString string) (*SynapseDatabase, error) { + var d SynapseDatabase + var err error + + if d.db, err = sql.Open("postgres", connectionString); err != nil { + return nil, err + } + + if d.statements.selectLocalMedia, err = d.db.Prepare(selectLocalMedia); err != nil { + return nil, err + } + + return &d, nil +} + +func (d *SynapseDatabase) GetAllMedia() ([]LocalMedia, error) { + rows, err := d.statements.selectLocalMedia.Query() + if err != nil { + if err == sql.ErrNoRows { + return []LocalMedia{}, nil // no records + } + return nil, err + } + + var results []LocalMedia + for rows.Next() { + var mediaId sql.NullString + var contentType sql.NullString + var sizeBytes sql.NullInt64 + var createdTs sql.NullInt64 + var uploadName sql.NullString + var userId sql.NullString + err = rows.Scan( + &mediaId, + &contentType, + &sizeBytes, + &createdTs, + &uploadName, + &userId, + ) + if err != nil { + return nil, err + } + results = append(results, LocalMedia{ + MediaId: mediaId.String, + ContentType: contentType.String, + SizeBytes: sizeBytes.Int64, + CreatedTs: createdTs.Int64, + UploadName: uploadName.String, + UserId: userId.String, + }) + } + + return results, nil +} diff --git a/src/github.com/turt2live/matrix-media-repo/synapse/homeserver_yaml.go b/src/github.com/turt2live/matrix-media-repo/synapse/homeserver_yaml.go deleted file mode 100644 index 8dfc9fb9..00000000 --- a/src/github.com/turt2live/matrix-media-repo/synapse/homeserver_yaml.go +++ /dev/null @@ -1,50 +0,0 @@ -package synapse - -import ( - "errors" - "io/ioutil" - "os" - - "gopkg.in/yaml.v2" -) - -type HomeserverYaml struct { - Database struct { - Name string `yaml:"name"` - Arguments struct { - Username string `yaml:"user"` - Password string `yaml:"password"` - Database string `yaml:"database"` - Hostname string `yaml:"host"` - } `yaml:"args"` - } `yaml:"database"` -} - -func ReadConfig(yamlPath string) (HomeserverYaml, error) { - c := &HomeserverYaml{} - - f, err := os.Open(yamlPath) - if err != nil { - return *c, err - } - - defer f.Close() - - buffer, err := ioutil.ReadAll(f) - err = yaml.Unmarshal(buffer, &c) - if err != nil { - return *c, err - } - - return *c, nil -} - -func (c *HomeserverYaml) GetConnectionString() (string) { - if c.Database.Name != "psycopg2" { - panic(errors.New("homeserver database must be postgres")) - } - - a := c.Database.Arguments - - return "postgres://" + a.Username + ":" + a.Password + "@" + a.Hostname + "/" + a.Database + "?sslmode=disable" -} \ No newline at end of file diff --git a/src/github.com/turt2live/matrix-media-repo/synapse/local_media.go b/src/github.com/turt2live/matrix-media-repo/synapse/local_media.go new file mode 100644 index 00000000..3eb4626b --- /dev/null +++ b/src/github.com/turt2live/matrix-media-repo/synapse/local_media.go @@ -0,0 +1,10 @@ +package synapse + +type LocalMedia struct { + MediaId string + ContentType string + SizeBytes int64 + CreatedTs int64 + UploadName string + UserId string +} diff --git a/vendor/manifest b/vendor/manifest index aa4e0bf1..ba0bb32c 100644 --- a/vendor/manifest +++ b/vendor/manifest @@ -38,6 +38,12 @@ "revision": "2d5fef06b891c971b14aa6f71ca5ab6c03a36e0e", "branch": "master" }, + { + "importpath": "github.com/howeyc/gopass", + "repository": "https://github.com/howeyc/gopass", + "revision": "bf9dde6d0d2c004a008c27aaee91170c786f6db8", + "branch": "master" + }, { "importpath": "github.com/lestrrat/go-file-rotatelogs", "repository": "https://github.com/lestrrat/go-file-rotatelogs", -- GitLab