From 47ece80ba8b3f725fa8a2cc5f673fb249dbe5a2a Mon Sep 17 00:00:00 2001
From: Travis Ralston <travpc@gmail.com>
Date: Sun, 7 Jan 2018 04:36:14 -0700
Subject: [PATCH] Synapse import

This is done over http to ensure that synapse is left with a backup of the media repository.

Adds #4
---
 README.md                                     |  28 +++++
 .../cmd/import_synapse/main.go                | 110 ++++++++++++++++--
 .../turt2live/matrix-media-repo/synapse/db.go |  74 ++++++++++++
 .../synapse/homeserver_yaml.go                |  50 --------
 .../matrix-media-repo/synapse/local_media.go  |  10 ++
 vendor/manifest                               |   6 +
 6 files changed, 218 insertions(+), 60 deletions(-)
 create mode 100644 src/github.com/turt2live/matrix-media-repo/synapse/db.go
 delete mode 100644 src/github.com/turt2live/matrix-media-repo/synapse/homeserver_yaml.go
 create mode 100644 src/github.com/turt2live/matrix-media-repo/synapse/local_media.go

diff --git a/README.md b/README.md
index 8f7baf86..46b171ec 100644
--- a/README.md
+++ b/README.md
@@ -107,3 +107,31 @@ listeners:
       - names: [federation]
         compress: false
 ```
+
+# Importing media from synapse
+
+Media is imported by connecting to your synapse database and downloading all the content from the homeserver. This is so you have a backup of the media repository still with synapse. **Do not point traffic at the media repo until after the import is complete.**
+
+1. Build the media repo
+2. Configure the `media-repo.yaml`
+3. Run `bin/import_synapse`. The usage is below. 
+    ```
+    Usage of ./bin/import_synapse:
+      -baseUrl string
+            The base URL to access your homeserver with (default "http://localhost:8008")
+      -dbHost string
+            The IP or hostname of the postgresql server with the synapse database (default "localhost")
+      -dbName string
+            The name of the synapse database (default "synapse")
+      -dbPassword string
+            The password to authorize the postgres user. Can be omitted to be prompted when run
+      -dbPort int
+            The port to access postgres on (default 5432)
+      -dbUsername string
+            The username to access postgres with (default "synapse")
+      -serverName string
+            The name of your homeserver (eg: matrix.org) (default "localhost")
+    ```
+    Assuming the media repository, postgres database, and synapse are all on the same host, the command to run would look something like: `bin/import_synapse -serverName myserver.com -dbUsername my_database_user -dbName synapse`
+4. Wait for the import to complete. The script will automatically deduplicate media.
+5. Point traffic to the media repository
diff --git a/src/github.com/turt2live/matrix-media-repo/cmd/import_synapse/main.go b/src/github.com/turt2live/matrix-media-repo/cmd/import_synapse/main.go
index 1103202f..53ab80cd 100644
--- a/src/github.com/turt2live/matrix-media-repo/cmd/import_synapse/main.go
+++ b/src/github.com/turt2live/matrix-media-repo/cmd/import_synapse/main.go
@@ -1,37 +1,127 @@
 package main
 
 import (
+	"context"
+	"errors"
 	"flag"
 	"fmt"
+	"io"
+	"net/http"
+	"strconv"
 
+	"github.com/howeyc/gopass"
+	"github.com/sirupsen/logrus"
 	"github.com/turt2live/matrix-media-repo/config"
+	"github.com/turt2live/matrix-media-repo/logging"
+	"github.com/turt2live/matrix-media-repo/rcontext"
+	"github.com/turt2live/matrix-media-repo/services"
 	"github.com/turt2live/matrix-media-repo/storage"
 	"github.com/turt2live/matrix-media-repo/synapse"
 )
 
 func main() {
+	postgresHost := flag.String("dbHost", "localhost", "The IP or hostname of the postgresql server with the synapse database")
+	postgresPort := flag.Int("dbPort", 5432, "The port to access postgres on")
+	postgresUsername := flag.String("dbUsername", "synapse", "The username to access postgres with")
+	postgresPassword := flag.String("dbPassword", "", "The password to authorize the postgres user. Can be omitted to be prompted when run")
+	postgresDatabase := flag.String("dbName", "synapse", "The name of the synapse database")
+	baseUrl := flag.String("baseUrl", "http://localhost:8008", "The base URL to access your homeserver with")
+	serverName := flag.String("serverName", "localhost", "The name of your homeserver (eg: matrix.org)")
+	flag.Parse()
+
+	var realPsqlPassword string
+	if *postgresPassword == "" {
+		fmt.Printf("Postgres password: ")
+		pass, err := gopass.GetPasswd()
+		if err != nil {
+			panic(err)
+		}
+		realPsqlPassword = string(pass[:])
+	} else {
+		realPsqlPassword = *postgresPassword
+	}
+
 	c, err := config.ReadConfig()
 	if err != nil {
 		panic(err)
 	}
 
+	err = logging.Setup(c.General.LogDirectory)
+	if err != nil {
+		panic(err)
+	}
+
+	logrus.Info("Setting up for importing...")
+
 	db, err := storage.OpenDatabase(c.Database.Postgres)
 	if err != nil {
 		panic(err)
 	}
 
-	homeserverYamlPath := flag.String("homeserver", "homeserver.yaml", "Path to your homeserver.yaml")
-	moveFiles := flag.Bool("move", false, "If set, files will be moved instead of copied")
-	importRemote := flag.Bool("remote", false, "If set, remote media will also be imported")
-	flag.Parse()
+	connectionString := "postgres://" + *postgresUsername + ":" + realPsqlPassword + "@" + *postgresHost + ":" + strconv.Itoa(*postgresPort) + "/" + *postgresDatabase + "?sslmode=disable"
+	csApiUrl := *baseUrl
+	if csApiUrl[len(csApiUrl)-1:] == "/" {
+		csApiUrl = csApiUrl[:len(csApiUrl)-1]
+	}
+
+	logrus.Info("Connecting to synapse database...")
+	synDb, err := synapse.OpenDatabase(connectionString)
+	if err != nil {
+		panic(err)
+	}
 
-	hsConfig, err := synapse.ReadConfig(*homeserverYamlPath)
+	logrus.Info("Fetching all local media records from synapse...")
+	records, err := synDb.GetAllMedia()
 	if err != nil {
 		panic(err)
 	}
 
-	fmt.Println(*moveFiles)
-	fmt.Println(*importRemote)
-	fmt.Println(*db)
-	fmt.Println(hsConfig.GetConnectionString())
-}
\ No newline at end of file
+	logrus.Info(fmt.Sprintf("Downloading %d media records", len(records)))
+	ctx := context.TODO()
+	for i := 0; i < len(records); i++ {
+		percent := int((float32(i+1) / float32(len(records))) * 100)
+		record := records[i]
+
+		info := rcontext.RequestInfo{
+			Log: logrus.WithFields(logrus.Fields{
+				"mediaId": record.MediaId,
+			}),
+			Context: ctx,
+			Db:      *db,
+			Config:  c,
+		}
+
+		info.Log.Info(fmt.Sprintf("Downloading %s (%d/%d %d%%)", record.MediaId, i+1, len(records), percent))
+
+		body, err := downloadMedia(csApiUrl, *serverName, record.MediaId)
+		if err != nil {
+			info.Log.Error(err.Error())
+			continue
+		}
+
+		svc := services.CreateMediaService(info)
+
+		_, err = svc.StoreMedia(body, record.ContentType, record.UploadName, record.UserId, *serverName, record.MediaId)
+		if err != nil {
+			info.Log.Error(err.Error())
+			continue
+		}
+
+		body.Close()
+	}
+
+	logrus.Info("Import completed")
+}
+
+func downloadMedia(baseUrl string, serverName string, mediaId string) (io.ReadCloser, error) {
+	downloadUrl := baseUrl + "/_matrix/media/r0/download/" + serverName + "/" + mediaId
+	resp, err := http.Get(downloadUrl)
+	if err != nil {
+		return nil, err
+	}
+	if resp.StatusCode != http.StatusOK {
+		return nil, errors.New("received status code " + strconv.Itoa(resp.StatusCode))
+	}
+
+	return resp.Body, nil
+}
diff --git a/src/github.com/turt2live/matrix-media-repo/synapse/db.go b/src/github.com/turt2live/matrix-media-repo/synapse/db.go
new file mode 100644
index 00000000..dd4c2945
--- /dev/null
+++ b/src/github.com/turt2live/matrix-media-repo/synapse/db.go
@@ -0,0 +1,74 @@
+package synapse
+
+import (
+	"database/sql"
+
+	_ "github.com/lib/pq" // postgres driver
+)
+
+const selectLocalMedia = "SELECT media_id, media_type, media_length, created_ts, upload_name, user_id FROM local_media_repository;"
+
+type SynapseDatabase struct {
+	db         *sql.DB
+	statements statements
+}
+
+type statements struct {
+	selectLocalMedia *sql.Stmt
+}
+
+func OpenDatabase(connectionString string) (*SynapseDatabase, error) {
+	var d SynapseDatabase
+	var err error
+
+	if d.db, err = sql.Open("postgres", connectionString); err != nil {
+		return nil, err
+	}
+
+	if d.statements.selectLocalMedia, err = d.db.Prepare(selectLocalMedia); err != nil {
+		return nil, err
+	}
+
+	return &d, nil
+}
+
+func (d *SynapseDatabase) GetAllMedia() ([]LocalMedia, error) {
+	rows, err := d.statements.selectLocalMedia.Query()
+	if err != nil {
+		if err == sql.ErrNoRows {
+			return []LocalMedia{}, nil // no records
+		}
+		return nil, err
+	}
+
+	var results []LocalMedia
+	for rows.Next() {
+		var mediaId sql.NullString
+		var contentType sql.NullString
+		var sizeBytes sql.NullInt64
+		var createdTs sql.NullInt64
+		var uploadName sql.NullString
+		var userId sql.NullString
+		err = rows.Scan(
+			&mediaId,
+			&contentType,
+			&sizeBytes,
+			&createdTs,
+			&uploadName,
+			&userId,
+		)
+		if err != nil {
+			return nil, err
+		}
+		results = append(results, LocalMedia{
+			MediaId:     mediaId.String,
+			ContentType: contentType.String,
+			SizeBytes:   sizeBytes.Int64,
+			CreatedTs:   createdTs.Int64,
+			UploadName:  uploadName.String,
+			UserId:      userId.String,
+		})
+	}
+
+	return results, nil
+}
diff --git a/src/github.com/turt2live/matrix-media-repo/synapse/homeserver_yaml.go b/src/github.com/turt2live/matrix-media-repo/synapse/homeserver_yaml.go
deleted file mode 100644
index 8dfc9fb9..00000000
--- a/src/github.com/turt2live/matrix-media-repo/synapse/homeserver_yaml.go
+++ /dev/null
@@ -1,50 +0,0 @@
-package synapse
-
-import (
-	"errors"
-	"io/ioutil"
-	"os"
-
-	"gopkg.in/yaml.v2"
-)
-
-type HomeserverYaml struct {
-	Database struct {
-		Name string `yaml:"name"`
-		Arguments struct {
-			Username string `yaml:"user"`
-			Password string `yaml:"password"`
-			Database string `yaml:"database"`
-			Hostname string `yaml:"host"`
-		} `yaml:"args"`
-	} `yaml:"database"`
-}
-
-func ReadConfig(yamlPath string) (HomeserverYaml, error) {
-	c := &HomeserverYaml{}
-
-	f, err := os.Open(yamlPath)
-	if err != nil {
-		return *c, err
-	}
-
-	defer f.Close()
-
-	buffer, err := ioutil.ReadAll(f)
-	err = yaml.Unmarshal(buffer, &c)
-	if err != nil {
-		return *c, err
-	}
-
-	return *c, nil
-}
-
-func (c *HomeserverYaml) GetConnectionString() (string) {
-	if c.Database.Name != "psycopg2" {
-		panic(errors.New("homeserver database must be postgres"))
-	}
-
-	a := c.Database.Arguments
-
-	return "postgres://" + a.Username + ":" + a.Password + "@" + a.Hostname + "/" + a.Database + "?sslmode=disable"
-}
\ No newline at end of file
diff --git a/src/github.com/turt2live/matrix-media-repo/synapse/local_media.go b/src/github.com/turt2live/matrix-media-repo/synapse/local_media.go
new file mode 100644
index 00000000..3eb4626b
--- /dev/null
+++ b/src/github.com/turt2live/matrix-media-repo/synapse/local_media.go
@@ -0,0 +1,10 @@
+package synapse
+
+type LocalMedia struct {
+	MediaId string
+	ContentType string
+	SizeBytes int64
+	CreatedTs int64
+	UploadName string
+	UserId string
+}
diff --git a/vendor/manifest b/vendor/manifest
index aa4e0bf1..ba0bb32c 100644
--- a/vendor/manifest
+++ b/vendor/manifest
@@ -38,6 +38,12 @@
 			"revision": "2d5fef06b891c971b14aa6f71ca5ab6c03a36e0e",
 			"branch": "master"
 		},
+		{
+			"importpath": "github.com/howeyc/gopass",
+			"repository": "https://github.com/howeyc/gopass",
+			"revision": "bf9dde6d0d2c004a008c27aaee91170c786f6db8",
+			"branch": "master"
+		},
 		{
 			"importpath": "github.com/lestrrat/go-file-rotatelogs",
 			"repository": "https://github.com/lestrrat/go-file-rotatelogs",
-- 
GitLab