diff --git a/common/config/config.go b/common/config/config.go index 8a4320eb8e39f59bdd604b8eeb0c998076573bfc..7f07f6637d99ad2e7e681dce8eef5c7fcd2df140 100644 --- a/common/config/config.go +++ b/common/config/config.go @@ -39,6 +39,12 @@ type DatabaseConfig struct { Pool *DbPoolConfig `yaml:"pool"` } +type ArchivingConfig struct { + Enabled bool `yaml:"enabled"` + SelfService bool `yaml:"selfService"` + TargetBytesPerPart int64 `yaml:"targetBytesPerPart"` +} + type UploadsConfig struct { StoragePaths []string `yaml:"storagePaths,flow"` // deprecated MaxSizeBytes int64 `yaml:"maxBytes"` @@ -143,6 +149,7 @@ type MediaRepoConfig struct { Admins []string `yaml:"admins,flow"` Database *DatabaseConfig `yaml:"database"` DataStores []DatastoreConfig `yaml:"datastores"` + Archiving *ArchivingConfig `yaml:"archiving"` Uploads *UploadsConfig `yaml:"uploads"` Downloads *DownloadsConfig `yaml:"downloads"` Thumbnails *ThumbnailsConfig `yaml:"thumbnails"` @@ -234,6 +241,11 @@ func NewDefaultConfig() *MediaRepoConfig { Homeservers: []*HomeserverConfig{}, Admins: []string{}, DataStores: []DatastoreConfig{}, + Archiving: &ArchivingConfig{ + Enabled: true, + SelfService: false, + TargetBytesPerPart: 209715200, // 200mb + }, Uploads: &UploadsConfig{ MaxSizeBytes: 104857600, // 100mb MinSizeBytes: 100, diff --git a/config.sample.yaml b/config.sample.yaml index 45a1d5c6949cec01c1189cd9609f99c52a10c86e..351da2efb61573ff86d6c95584cf73bb4b0efbd0 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -65,17 +65,22 @@ sharedSecretAuth: datastores: - type: file enabled: false # Enable this to set up data storage. - # Datastores can be split into three areas when handling uploads: thumbnails, remote_media, - # and local_media. Media is still de-duplicated across all datastores (local content which - # duplicates remote content will re-use the remote content's location). This option is useful - # if your datastore is becoming very large, or if you want faster storage for a particular - # kind of media. + # Datastores can be split into many areas when handling uploads. Media is still de-duplicated + # across all datastores (local content which duplicates remote content will re-use the remote + # content's location). This option is useful if your datastore is becoming very large, or if + # you want faster storage for a particular kind of media. + # + # The kinds available are: + # thumbnails - Used to store thumbnails of media (local and remote). + # remote_media - Original copies of remote media (servers not configured by this repo). + # local_media - Original uploads for local media. + # archives - Archives of content (GDPR and similar requests). forKinds: ["thumbnails"] opts: path: /var/matrix/media - type: s3 enabled: false # Enable this to set up s3 uploads - forKinds: ["thumbnails", "remote_media", "local_media"] + forKinds: ["thumbnails", "remote_media", "local_media", "archives"] opts: # The s3 uploader needs a temporary location to buffer files to reduce memory usage on # small file uploads. If the file size is unknown, the file is written to this location @@ -88,6 +93,21 @@ datastores: ssl: true bucketName: "your-media-bucket" +# Options for controlling archives. Archives are exports of a particular user's content for +# the purpose of GDPR or moving media to a different server. +archiving: + # Whether archiving is enabled or not. Default enabled. + enabled: true + # If true, users can request a copy of their own data. By default, only repository administrators + # can request a copy. + selfService: false + # The number of bytes to target per archive before breaking up the files. This is independent + # of any file upload limits and will require a similar amount of memory when performing an export. + # The file size is also a target, not a guarantee - it is possible to have files that are smaller + # or larger than the target. This is recommended to be approximately double the size of your + # file upload limit, provided there is enough memory available for the demand of exporting. + targetBytesPerPart: 209715200 # 200mb default + # The file upload settings for the media repository uploads: maxBytes: 104857600 # 100MB default, 0 to disable diff --git a/docs/admin.md b/docs/admin.md index 3da8650c8f28f2e00f862634f03e7a6c0166aaa0..032aa3d1903d4e18223f2a956356441945499256 100644 --- a/docs/admin.md +++ b/docs/admin.md @@ -310,3 +310,88 @@ The response is the status of the task: ``` **Note**: The `params` vary depending on the task. + +## Exporting/Importing data + +Exports (and therefore imports) are currently done on a per-user basis. This is primarily useful when moving users to new hosts or doing GDPR exports of user data. + +#### Exporting data for a user + +URL: `POST /_matrix/media/unstable/admin/user/<user ID>/export` + +The request body is: +```json +{ + "include_data": true, + "s3_urls": true +} +``` + +Both fields are optional, and their default values are shown. If `include_data` is false, only metadata will be returned by the export. `s3_urls`, when true, includes the s3 URL to the media in the metadata if one is available. + +The response is a task ID and export ID to put into the 'view export' URL: + +```json +{ + "export_id": "abcdef", + "task_id": 12 +} +``` + +**Note**: the `export_id` will be included in the task's `params`. + +**Note**: the `export_id` should be treated as a secret/authentication token as it allows someone to download other people's data. + +#### Viewing an export + +After the task has been completed, the `export_id` can be used to download the content. + +URL: `GET /_matrix/media/unstable/admin/export/<export ID>/view` + +The response will be a webpage for the user to interact with. From this page, the user can say they've downloaded the export and delete it. + +#### Downloading an export (for scripts) + +Similar to viewing an export, an export may be downloaded to later be imported. + +Exports are split into several tar (gzipped) files and need to be downloaded individually. To get the list of files, call: + +`GET /_matrix/media/unstable/admin/export/<export ID>/metadata` + +which returns: + +```json +{ + "entity": "@travis:t2l.io", + "parts": [ + { + "index": 1, + "size": 1024000, + "name": "TravisR-part-1.tgz" + }, + { + "index": 2, + "size": 1024000, + "name": "TravisR-part-2.tgz" + } + ] +} +``` + +**Note**: the `name` demonstrated may be different and should not be parsed. The `size` is in bytes. + +Then one can call the following to download each part: + +`GET /_matrix/media/unstable/admin/export/<export ID>/part/<index>` + +#### Deleting an export + +After the export has been downloaded, it can be deleted. Note that this endpoint can be called by the user from the "view export" page. + +`DELETE /_matrix/media/unstable/admin/export/<export ID>` + +The response is an empty JSON object if successful. + +#### Importing a previous export + +Not yet implemented.