From f7546a732c1e00ea24eec73a6ceed5e1e151630a Mon Sep 17 00:00:00 2001
From: Travis Ralston <travpc@gmail.com>
Date: Sat, 15 Aug 2020 23:04:35 -0600
Subject: [PATCH] Include audio information in /info output

---
 CHANGELOG.md                   |  1 +
 api/unstable/info.go           | 47 ++++++++++++++++++++++++++++------
 thumbnailing/i/01-factories.go |  4 +++
 thumbnailing/i/flac.go         | 10 ++++++++
 thumbnailing/i/mp3.go          | 41 ++++++++++++++++++++++++-----
 thumbnailing/i/ogg.go          | 10 ++++++++
 thumbnailing/i/wav.go          | 10 ++++++++
 thumbnailing/m/audio_info.go   | 12 +++++++++
 thumbnailing/thumbnail.go      | 15 +++++++++++
 9 files changed, 136 insertions(+), 14 deletions(-)
 create mode 100644 thumbnailing/m/audio_info.go

diff --git a/CHANGELOG.md b/CHANGELOG.md
index aa19d99e..19136c43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 * Added a new tool, `export_synapse_for_import`, which can be used to do an offline import from Synapse.
   * After running this tool, use the `gdpr_import` tool to bring the export into the media repo.
 * Added thumbnailing support for some audio waveforms (MP3, WAV, OGG, and FLAC).
+* Added audio metadata (duration, etc) to the unstable `/info` endpoint. Aligns with [MSC2380](https://github.com/matrix-org/matrix-doc/pull/2380).
 
 ### Fixed
 
diff --git a/api/unstable/info.go b/api/unstable/info.go
index 086f47ae..e880aec2 100644
--- a/api/unstable/info.go
+++ b/api/unstable/info.go
@@ -1,9 +1,12 @@
 package unstable
 
 import (
+	"bytes"
 	"database/sql"
+	"io/ioutil"
 	"net/http"
 	"strconv"
+	"strings"
 
 	"github.com/disintegration/imaging"
 	"github.com/gorilla/mux"
@@ -13,6 +16,9 @@ import (
 	"github.com/turt2live/matrix-media-repo/common/rcontext"
 	"github.com/turt2live/matrix-media-repo/controllers/download_controller"
 	"github.com/turt2live/matrix-media-repo/storage"
+	"github.com/turt2live/matrix-media-repo/thumbnailing"
+	"github.com/turt2live/matrix-media-repo/thumbnailing/i"
+	"github.com/turt2live/matrix-media-repo/util"
 	"github.com/turt2live/matrix-media-repo/util/cleanup"
 )
 
@@ -27,13 +33,17 @@ type mediaInfoThumbnail struct {
 }
 
 type MediaInfoResponse struct {
-	ContentUri  string                `json:"content_uri"`
-	ContentType string                `json:"content_type"`
-	Width       int                   `json:"width,omitempty"`
-	Height      int                   `json:"height,omitempty"`
-	Size        int64                 `json:"size"`
-	Hashes      mediaInfoHashes       `json:"hashes"`
-	Thumbnails  []*mediaInfoThumbnail `json:"thumbnails,omitempty"`
+	ContentUri      string                `json:"content_uri"`
+	ContentType     string                `json:"content_type"`
+	Width           int                   `json:"width,omitempty"`
+	Height          int                   `json:"height,omitempty"`
+	Size            int64                 `json:"size"`
+	Hashes          mediaInfoHashes       `json:"hashes"`
+	Thumbnails      []*mediaInfoThumbnail `json:"thumbnails,omitempty"`
+	DurationSeconds float64               `json:"duration,omitempty"`
+	NumTotalSamples int                   `json:"num_total_samples,omitempty"`
+	KeySamples      [][2]float64          `json:"key_samples,omitempty"`
+	NumChannels     int                   `json:"num_channels,omitempty"`
 }
 
 func MediaInfo(r *http.Request, rctx rcontext.RequestContext, user api.UserInfo) interface{} {
@@ -72,6 +82,12 @@ func MediaInfo(r *http.Request, rctx rcontext.RequestContext, user api.UserInfo)
 	}
 	defer cleanup.DumpAndCloseStream(streamedMedia.Stream)
 
+	b, err := ioutil.ReadAll(streamedMedia.Stream)
+	if err != nil {
+		rctx.Log.Error("Unexpected error processing media: " + err.Error())
+		return api.InternalServerError("Unexpected Error")
+	}
+
 	response := &MediaInfoResponse{
 		ContentUri:  streamedMedia.KnownMedia.MxcUri(),
 		ContentType: streamedMedia.KnownMedia.ContentType,
@@ -81,7 +97,7 @@ func MediaInfo(r *http.Request, rctx rcontext.RequestContext, user api.UserInfo)
 		},
 	}
 
-	img, err := imaging.Decode(streamedMedia.Stream)
+	img, err := imaging.Decode(bytes.NewBuffer(b))
 	if err == nil {
 		response.Width = img.Bounds().Max.X
 		response.Height = img.Bounds().Max.Y
@@ -106,5 +122,20 @@ func MediaInfo(r *http.Request, rctx rcontext.RequestContext, user api.UserInfo)
 		response.Thumbnails = infoThumbs
 	}
 
+	if strings.HasPrefix(response.ContentType, "audio/") {
+		generator, err := thumbnailing.GetGenerator(util.ByteCloser(b), response.ContentType, false)
+		if err == nil {
+			if audiogenerator, ok := generator.(i.AudioGenerator); ok {
+				audioInfo, err := audiogenerator.GetAudioData(b, 768, rctx)
+				if err == nil {
+					response.KeySamples = audioInfo.KeySamples
+					response.NumChannels = audioInfo.Channels
+					response.DurationSeconds = audioInfo.Duration.Seconds()
+					response.NumTotalSamples = audioInfo.TotalSamples
+				}
+			}
+		}
+	}
+
 	return response
 }
diff --git a/thumbnailing/i/01-factories.go b/thumbnailing/i/01-factories.go
index ac60eadb..6ae368bf 100644
--- a/thumbnailing/i/01-factories.go
+++ b/thumbnailing/i/01-factories.go
@@ -12,6 +12,10 @@ type Generator interface {
 	GenerateThumbnail(b []byte, contentType string, width int, height int, method string, animated bool, ctx rcontext.RequestContext) (*m.Thumbnail, error)
 }
 
+type AudioGenerator interface {
+	GetAudioData(b []byte, nKeys int, ctx rcontext.RequestContext) (*m.AudioInfo, error)
+}
+
 var generators = make([]Generator, 0)
 
 func GetGenerator(img []byte, contentType string, needsAnimation bool) Generator {
diff --git a/thumbnailing/i/flac.go b/thumbnailing/i/flac.go
index 39ce1ff4..c7f73995 100644
--- a/thumbnailing/i/flac.go
+++ b/thumbnailing/i/flac.go
@@ -34,6 +34,16 @@ func (d flacGenerator) GenerateThumbnail(b []byte, contentType string, width int
 	return mp3Generator{}.GenerateFromStream(audio, format, width, height)
 }
 
+func (d flacGenerator) GetAudioData(b []byte, nKeys int, ctx rcontext.RequestContext) (*m.AudioInfo, error) {
+	audio, format, err := flac.Decode(util.ByteCloser(b))
+	if err != nil {
+		return nil, errors.New("flac: error decoding audio: " + err.Error())
+	}
+
+	defer audio.Close()
+	return mp3Generator{}.GetDataFromStream(audio, format, nKeys)
+}
+
 func init() {
 	generators = append(generators, flacGenerator{})
 }
diff --git a/thumbnailing/i/mp3.go b/thumbnailing/i/mp3.go
index b7086c68..c25c3d77 100644
--- a/thumbnailing/i/mp3.go
+++ b/thumbnailing/i/mp3.go
@@ -42,7 +42,17 @@ func (d mp3Generator) GenerateThumbnail(b []byte, contentType string, width int,
 	return d.GenerateFromStream(audio, format, width, height)
 }
 
-func (d mp3Generator) GenerateFromStream(audio beep.StreamSeekCloser, format beep.Format, width int, height int) (*m.Thumbnail, error) {
+func (d mp3Generator) GetAudioData(b []byte, nKeys int, ctx rcontext.RequestContext) (*m.AudioInfo, error) {
+	audio, format, err := mp3.Decode(util.ByteCloser(b))
+	if err != nil {
+		return nil, errors.New("mp3: error decoding audio: " + err.Error())
+	}
+
+	defer audio.Close()
+	return d.GetDataFromStream(audio, format, nKeys)
+}
+
+func (d mp3Generator) GetDataFromStream(audio beep.StreamSeekCloser, format beep.Format, nKeys int) (*m.AudioInfo, error) {
 	allSamples := make([][2]float64, 0)
 
 	moreSamples := true
@@ -63,15 +73,34 @@ func (d mp3Generator) GenerateFromStream(audio beep.StreamSeekCloser, format bee
 		}
 	}
 
-	// Figure out a resolution that will work for us
-	everyNth := int(math.Round(float64(len(allSamples)) / float64(width)))
-	averagedSamples := make([]float64, 0)
+	downsampled := make([][2]float64, 0)
+	everyNth := int(math.Round(float64(len(allSamples)) / float64(nKeys)))
 	for i, s := range allSamples {
 		if i%everyNth != 0 {
 			continue
 		}
+		downsampled = append(downsampled, s)
+	}
+
+	return &m.AudioInfo{
+		Duration:     format.SampleRate.D(len(allSamples)),
+		Channels:     format.NumChannels,
+		TotalSamples: len(allSamples),
+		KeySamples:   downsampled,
+	}, nil
+}
+
+func (d mp3Generator) GenerateFromStream(audio beep.StreamSeekCloser, format beep.Format, width int, height int) (*m.Thumbnail, error) {
+	info, err := d.GetDataFromStream(audio, format, width)
+	if err != nil {
+		return nil, errors.New("beep-visual: error sampling audio: " + err.Error())
+	}
+
+	// Average out all the samples
+	averagedSamples := make([]float64, 0)
+	for _, s := range info.KeySamples {
 		avg := (s[0] + s[1]) / 2
-		if format.NumChannels == 1 {
+		if info.Channels == 1 {
 			avg = s[0]
 		}
 		averagedSamples = append(averagedSamples, avg)
@@ -104,7 +133,7 @@ func (d mp3Generator) GenerateFromStream(audio beep.StreamSeekCloser, format bee
 
 	// Encode to a png
 	imgData := &bytes.Buffer{}
-	err := imaging.Encode(imgData, img, imaging.PNG)
+	err = imaging.Encode(imgData, img, imaging.PNG)
 	if err != nil {
 		return nil, errors.New("beep-visual: error encoding thumbnail: " + err.Error())
 	}
diff --git a/thumbnailing/i/ogg.go b/thumbnailing/i/ogg.go
index bba26a83..dec2d969 100644
--- a/thumbnailing/i/ogg.go
+++ b/thumbnailing/i/ogg.go
@@ -34,6 +34,16 @@ func (d oggGenerator) GenerateThumbnail(b []byte, contentType string, width int,
 	return mp3Generator{}.GenerateFromStream(audio, format, width, height)
 }
 
+func (d oggGenerator) GetAudioData(b []byte, nKeys int, ctx rcontext.RequestContext) (*m.AudioInfo, error) {
+	audio, format, err := vorbis.Decode(util.ByteCloser(b))
+	if err != nil {
+		return nil, errors.New("ogg: error decoding audio: " + err.Error())
+	}
+
+	defer audio.Close()
+	return mp3Generator{}.GetDataFromStream(audio, format, nKeys)
+}
+
 func init() {
 	generators = append(generators, oggGenerator{})
 }
diff --git a/thumbnailing/i/wav.go b/thumbnailing/i/wav.go
index c513af95..ac34f214 100644
--- a/thumbnailing/i/wav.go
+++ b/thumbnailing/i/wav.go
@@ -34,6 +34,16 @@ func (d wavGenerator) GenerateThumbnail(b []byte, contentType string, width int,
 	return mp3Generator{}.GenerateFromStream(audio, format, width, height)
 }
 
+func (d wavGenerator) GetAudioData(b []byte, nKeys int, ctx rcontext.RequestContext) (*m.AudioInfo, error) {
+	audio, format, err := wav.Decode(util.ByteCloser(b))
+	if err != nil {
+		return nil, errors.New("wav: error decoding audio: " + err.Error())
+	}
+
+	defer audio.Close()
+	return mp3Generator{}.GetDataFromStream(audio, format, nKeys)
+}
+
 func init() {
 	generators = append(generators, wavGenerator{})
 }
diff --git a/thumbnailing/m/audio_info.go b/thumbnailing/m/audio_info.go
new file mode 100644
index 00000000..a194cd4d
--- /dev/null
+++ b/thumbnailing/m/audio_info.go
@@ -0,0 +1,12 @@
+package m
+
+import (
+	"time"
+)
+
+type AudioInfo struct {
+	KeySamples   [][2]float64
+	Duration     time.Duration
+	TotalSamples int
+	Channels     int
+}
diff --git a/thumbnailing/thumbnail.go b/thumbnailing/thumbnail.go
index 357cfe6b..422374de 100644
--- a/thumbnailing/thumbnail.go
+++ b/thumbnailing/thumbnail.go
@@ -42,3 +42,18 @@ func GenerateThumbnail(imgStream io.ReadCloser, contentType string, width int, h
 
 	return generator.GenerateThumbnail(b, contentType, width, height, method, animated, ctx)
 }
+
+func GetGenerator(imgStream io.ReadCloser, contentType string, animated bool) (i.Generator, error) {
+	defer cleanup.DumpAndCloseStream(imgStream)
+	b, err := ioutil.ReadAll(imgStream)
+	if err != nil {
+		return nil, err
+	}
+
+	generator := i.GetGenerator(b, contentType, animated)
+	if generator == nil {
+		return nil, ErrUnsupported
+	}
+
+	return generator, nil
+}
-- 
GitLab