From d42bb9b28864f063dc254e96cb49942c0dfa816a Mon Sep 17 00:00:00 2001 From: Javier Feliz Date: Fri, 7 Feb 2025 12:20:35 -0700 Subject: [PATCH] Add opusframes package with encoding/decoding (#13) Closes #12 Added a library to encode videos from yt-dlp into our custom `.of` file format that can be easily read and directly streamed to discord. Co-authored-by: Xander Bazzi Reviewed-on: https://www.gitgud.foo/thegrind/papibot/pulls/13 Reviewed-by: xbazzi Co-authored-by: Javier Feliz Co-committed-by: Javier Feliz --- .gitignore | 1 + main.go | 80 +++++---------------- pkg/opusframes/opusframes.go | 130 +++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+), 63 deletions(-) create mode 100644 pkg/opusframes/opusframes.go diff --git a/.gitignore b/.gitignore index 149a5d0..74fd59d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ testfile.wav play.opus output.opus +*.of .env *.wav *.opus diff --git a/main.go b/main.go index 3e10a43..28bd7ed 100644 --- a/main.go +++ b/main.go @@ -2,7 +2,6 @@ package main import ( "fmt" - "io" "log" "os" "os/exec" @@ -10,6 +9,7 @@ import ( "syscall" "time" + "git.thegrind.dev/thegrind/papibot/pkg/opusframes" dg "github.com/bwmarrin/discordgo" "github.com/javif89/dotenv" ) @@ -108,79 +108,28 @@ func playCommand(s *dg.Session, i *dg.InteractionCreate) { } func playOnVoiceChannel(voiceChannel *dg.VoiceConnection) { - log.Println("Starting ffmpeg stream") - // I got the original implementation from: - // https://github.com/nhooyr/botatouille/blob/7e1cd9d5a8d517fd43fd11599b2a62bf832a5c96/cmd/botatouille/music/music.go#L62-L104 - // after hours of searching. - ffmpeg := exec.Command( - "ffmpeg", - "-i", "vid.webm", - "-hide_banner", - "-loglevel", "quiet", - "-f", "data", - "-map", "0:a", - "-ar", "48k", - "-ac", "2", - "-acodec", "libopus", - "-b:a", "128k", - "pipe:1") - - ffmpegOut, err := ffmpeg.StdoutPipe() + log.Println("Decoding") + start := time.Now() + frames, err := opusframes.Decode("vid.of") if err != nil { - log.Fatal(err) + log.Println("Decoding error: ", err) + return } + duration := time.Since(start) + log.Printf("Decoding took: %s", duration) - err = ffmpeg.Start() - if err != nil { - log.Fatal(err) - } - - packets := [][]byte{} - - startTime := time.Now() - - bytes_sent := 0 - for { - // I read in the RFC that frames will not be bigger than this size - p := make([]byte, 960) - n, err := ffmpegOut.Read(p) - bytes_sent = bytes_sent + n - - if err != nil { - log.Printf("Bytes: %d", n) - if err == io.EOF { - log.Println("Done streaming") - break - } - log.Fatal(err) - } - - packets = append(packets, p[:n]) - } - - elapsedTime := time.Since(startTime) - - log.Printf("bytes sent = %d", bytes_sent) - log.Printf("Took %s seconds to run", elapsedTime) + // for _, f := range frames { + // log.Printf("Got frame. Size: %d", len(f)) + // } voiceChannel.Speaking(true) time.Sleep(time.Second * 2) log.Println("Playing sound") - startTime = time.Now() - bytes_sent = 0 - for _, p := range packets { + for _, p := range frames { log.Printf("Sending packet: %d bytes", len(p)) - bytes_sent += len(p) - voiceChannel.OpusSend <- p } - - elapsedTime = time.Since(startTime) - log.Printf("Packets = %d", len(packets)) - log.Printf("Network bytes sent = %d", bytes_sent) - log.Printf("Took %s seconds to run", elapsedTime) - log.Println("Ended stream") } func handleSlashCommand(s *dg.Session, i *dg.InteractionCreate) { @@ -233,5 +182,10 @@ func downloadVideo(url string) error { err := cmd.Run() log.Println("Downloaded") + start := time.Now() + opusframes.Encode("vid.webm", "vid.of") + duration := time.Since(start) + log.Printf("Encoding took: %s", duration) + return err } \ No newline at end of file diff --git a/pkg/opusframes/opusframes.go b/pkg/opusframes/opusframes.go new file mode 100644 index 0000000..b4f76af --- /dev/null +++ b/pkg/opusframes/opusframes.go @@ -0,0 +1,130 @@ +package opusframes + +// Package to encode/decode .of files. +// The .of file format will allow us to cache/preprocess +// files from the queue and save it in a format that we +// can quickly read and send to discord. +// The format spect is very simple: +// frame size: 2 bytes +// frame data: Up to 1275 bytes +// data will be stored in little endian format +import ( + "bytes" + "encoding/binary" + "errors" + "io" + "log" + "os" + "os/exec" +) + +type ErrFFMPEG error +type ErrEncoding error + +// TODO: Allow streaming frames as they are encoded instead +// of having to wait for the whole file. +// We can probably do this by taking an io.Reader as input +// instead of just a file path. Then we return an io.Writer +// and allow the user to either stream each encoded frame +// or save it to a file. We could also make the Decode +// function take an io.Reader so these functions can +// just be piped into each other. + +// TODO: Can we pack the frames into chunks of 960? I wonder +// if that would make playback smoother or just fuck the +// audio. We can try it at some point. +func Encode(input, output string) error { + ffmpeg := exec.Command( + "ffmpeg", + "-i", input, + "-hide_banner", + "-loglevel", "quiet", + "-f", "data", + "-map", "0:a", + "-ar", "48k", + "-ac", "2", + "-acodec", "libopus", + "-b:a", "128k", + "pipe:1") + + ffmpegOut, err := ffmpeg.StdoutPipe() + if err != nil { + return ErrFFMPEG(err) + } + + err = ffmpeg.Start() + if err != nil { + return ErrFFMPEG(err) + } + + var fileBuffer bytes.Buffer + + for { + p := make([]byte, 960) + n, err := ffmpegOut.Read(p) + + binary.Write(&fileBuffer, binary.LittleEndian, uint16(n)) + binary.Write(&fileBuffer, binary.LittleEndian, p[:n]) + + log.Printf("Frame of: %d", n) + + if err != nil { + if err == io.EOF { + break + } + + return ErrEncoding(err) + } + } + + f, err := os.OpenFile(output, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0644) + if err != nil { + log.Println(err) + } + defer f.Close() + + f.Write(fileBuffer.Bytes()) + + return nil +} + +var ErrMalformedFile error = errors.New("malformed file") + +// TODO: Allow streaming frames as they are decoded instead +// of having to decode the whole file. +func Decode(input string) ([][]byte, error){ + f, err := os.Open(input) + if err != nil { + return nil, err + } + defer f.Close() + + // We read the first 2 bytes for the length + // then n bytes after that until we're done + frames := [][]byte{} + for { + var frameSize uint16 + err = binary.Read(f, binary.LittleEndian, &frameSize) + + if err != nil { + if err == io.EOF { + return frames, nil + } + + return nil, err + } + + frame := make([]byte, frameSize) + n, err := io.ReadFull(f, frame) + + if n != int(frameSize) { + return nil, ErrMalformedFile + } + + if err != nil { + return nil, err + } + + frames = append(frames, frame) + } +} \ No newline at end of file