diff --git a/CHANGELOG.md b/CHANGELOG.md index ea5b930ed27071d305c9fff9d2afdb7011ab10d3..d141fa569e24e44a05dd118a28df7b81d00f66d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] +### Added + +* Added a `-verify` mode to imports to determine if large imports were successful. + ### Changed * `Content-Disposition` of plain text files now defaults to `inline`. diff --git a/README.md b/README.md index d31c7655e3510db2d6ee7c0142e6a6ed6556e289..0409108e6669925221e6798eaefc795dc612c5e2 100644 --- a/README.md +++ b/README.md @@ -120,4 +120,6 @@ Usage of gdpr_import: The directory for where the entity's exported files are (default "./gdpr-data") -migrations string The absolute path for the migrations folder (default "./migrations") + -verify + If set, no media will be imported and instead be tested to see if they've been imported already ``` diff --git a/cmd/gdpr_import/main.go b/cmd/gdpr_import/main.go index eee00c453a17cf1e288355c3a1cce0866a40659e..76f6adc006395227750843c51e842b1bf94e38fb 100644 --- a/cmd/gdpr_import/main.go +++ b/cmd/gdpr_import/main.go @@ -22,6 +22,7 @@ func main() { configPath := flag.String("config", "media-repo.yaml", "The path to the configuration") migrationsPath := flag.String("migrations", config.DefaultMigrationsPath, "The absolute path for the migrations folder") filesDir := flag.String("directory", "./gdpr-data", "The directory for where the entity's exported files are") + verifyMode := flag.Bool("verify", false, "If set, no media will be imported and instead be tested to see if they've been imported already") flag.Parse() // Override config path with config for Docker users @@ -72,7 +73,6 @@ func main() { } } - logrus.Info("Starting import...") ctx := rcontext.Initial().LogWithFields(logrus.Fields{"flagDir": *filesDir}) f, err := os.Open(files[manifestIdx]) @@ -80,6 +80,27 @@ func main() { panic(err) } defer f.Close() + + if *verifyMode { + found, expected, missingIds, err := data_controller.VerifyImport(f, ctx) + if err != nil { + panic(err) + } + logrus.Info("Known imported media IDs: ", found) + logrus.Info("Expected media IDs: ", expected) + + if len(missingIds) > 0 { + for _, mxc := range missingIds { + logrus.Error("Expected media ID but was not present: ", mxc) + } + logrus.Warn("Not all media is present. See logs for details.") + os.Exit(1) + } + logrus.Info("All media present!") + return // exit 0 + } + + logrus.Info("Starting import...") task, importId, err := data_controller.StartImport(f, ctx) if err != nil { panic(err) diff --git a/controllers/data_controller/import_controller.go b/controllers/data_controller/import_controller.go index de7c788e51fa58b31a96cdb0afe871ab88669d34..0da9f6481878477eeb9d70684efc82a30e6a01da 100644 --- a/controllers/data_controller/import_controller.go +++ b/controllers/data_controller/import_controller.go @@ -30,6 +30,44 @@ type importUpdate struct { var openImports = &sync.Map{} // importId => updateChan +func VerifyImport(data io.Reader, ctx rcontext.RequestContext) (int, int, []string, error) { + // Prepare the first update for the import (sync, so we can error) + // We do this before anything else because if the archive is invalid then we shouldn't + // even bother with an import. + results, err := processArchive(data) + if err != nil { + return 0, 0, nil, err + } + + manifestFile, ok := results["manifest.json"] + if !ok { + return 0, 0, nil, errors.New("no manifest provided in data package") + } + + archiveManifest := &Manifest{} + err = json.Unmarshal(manifestFile.Bytes(), archiveManifest) + if err != nil { + return 0, 0, nil, err + } + + expected := 0 + found := 0 + missing := make([]string, 0) + db := storage.GetDatabase().GetMediaStore(ctx) + for mxc, r := range archiveManifest.Media { + ctx.Log.Info("Checking file: ", mxc) + expected++ + _, err = db.Get(r.Origin, r.MediaId) + if err == nil { + found++ + } else { + missing = append(missing, mxc) + } + } + + return found, expected, missing, nil +} + func StartImport(data io.Reader, ctx rcontext.RequestContext) (*types.BackgroundTask, string, error) { // Prepare the first update for the import (sync, so we can error) // We do this before anything else because if the archive is invalid then we shouldn't