Golang : Merge video(OpenCV) and audio(PortAudio) into a mp4 file




The previous Golang tutorials cover on how to capture image frames from a web camera with OpenCV and record voice from microphone with PortAudio. For this tutorial, we will learn how to merge video stream and audio stream into a single file with the help of ffmpeg.

In the code example that follows, we will launch a goroutine to capture audio stream from microphone and simultaneously capture video stream from camera. The audio and video data will be encoded separately into two files and merge into a singe mp4 file with ffmpeg. AFAIK, there is no single stream way of capturing both audio and video from a web camera yet in Golang. This is the best solution that I can come up with. It is based on a similar program but written in Python - https://github.com/JRodrigoF/AVrecordeR/blob/master/AVrecordeR.py

Make sure you have OpenCV and PortAudio installed on your computer before building the code below.

Here you go!


 package main

 /*
  #include <stdio.h>
  #include <unistd.h>
  #include <termios.h>
  char getch(){
 char ch = 0;
 struct termios old = {0};
 fflush(stdout);
 if( tcgetattr(0, &old) < 0 ) perror("tcsetattr()");
 old.c_lflag &= ~ICANON;
 old.c_lflag &= ~ECHO;
 old.c_cc[VMIN] = 1;
 old.c_cc[VTIME] = 0;
 if( tcsetattr(0, TCSANOW, &old) < 0 ) perror("tcsetattr ICANON");
 if( read(0, &ch,1) < 0 ) perror("read()");
 old.c_lflag |= ICANON;
 old.c_lflag |= ECHO;
 if(tcsetattr(0, TCSADRAIN, &old) < 0) perror("tcsetattr ~ICANON");
 return ch;
  }
 */
 import "C"

 // stackoverflow.com/questions/14094190/golang-function-similar-to-getchar

 import (
 "fmt"
 "github.com/gordonklaus/portaudio"
 "github.com/lazywei/go-opencv/opencv"
 wave "github.com/zenwerk/go-wave"
 "math"
 "math/rand"
 "os"
 "os/exec"
 "path"
 "runtime"
 "time"
 )

 //global variables

 var (
 stopAudio = false
 stopVideo = false

 win = new(opencv.Window)
 waveWriter = new(wave.Writer)
 frameCounter = 1
 startTime = time.Now()
 )

 // recording in progress ticker. From good old DOS days.
 var ticker = []string{
 "-",
 "\\",
 "/",
 "|",
 }

 func errCheck(err error) {

 if err != nil {
 panic(err)
 }
 }

 func videoFeed(videoFileName string) {

 // activate webCamera
 webCamera := opencv.NewCameraCapture(opencv.CV_CAP_ANY) // autodetect

 if webCamera == nil {
 panic("Unable to open camera")
 }

 // !! NEED TO CHECK IF YOUR OS HAS THE CODECS INSTALLED BEFORE SELECTING THE CODEC !!
 // !! OTHERWISE, YOU WILL GET A VERY SMALL & CORRUPT VIDEO FILE !!

 // see http://www.fourcc.org/codecs.php for other possible combinations
 // opencv.FOURCC('p', 'i', 'm', '1') // MPEG-1 codec
 // opencv.FOURCC('m', 'j', 'p', 'g') // motion-jpeg codec
 // opencv.FOURCC('m', 'p', '4', '2') // MPEG-4.2 codec
 // opencv.FOURCC('d', 'i', 'v', '3') // MPEG-4.3 codec
 // opencv.FOURCC('m', 'p', '4', 'v') // MPEG-4 codec
 // opencv.FOURCC('u', '2', '6', '3') // H263 codec
 // opencv.FOURCC('i', '2', '6', '3') // H263I codec
 // opencv.FOURCC('f', 'l', 'v', '1') // FLV1 codec

 // codec := opencv.CV_FOURCC_PROMPT // Windows only. Prompt for codec selection
 // codec := int(webCamera.GetProperty(opencv.CV_CAP_PROP_FOURCC)) -- won't work on my Mac

 codec := int(opencv.FOURCC('m', 'p', '4', 'v')) // must be lower case, upper case will screw the file...

 fps := float32(25) // 25 frames per second
 //fps := float32(webCamera.GetProperty(opencv.CV_CAP_PROP_POS_FRAMES))
 frameWidth := int(webCamera.GetProperty(opencv.CV_CAP_PROP_FRAME_WIDTH))
 frameHeight := int(webCamera.GetProperty(opencv.CV_CAP_PROP_FRAME_HEIGHT))
 isColor := 1 // 0 = false(grayscale), 1 = true -- for Windows only I think

 // !! IMPORTANT : Remember to set the type to frameWidth and frameHeight for
 // for both input(src) and output(destination) as the same
 // otherwise, you gonna get this error message -
 // [OpenCV Error: Assertion failed (dst.data == dst0.data) in cvCvtColor,]

 // Just a note, you still can resize the frames before writing to file if you want

 // for more info, read http://docs.opencv.org/trunk/dd/d9e/classcv_1_1VideoWriter.html
 videoFileWriter := opencv.NewVideoWriter(videoFileName, codec, fps, frameWidth, frameHeight, isColor)

 win = opencv.NewWindow("Go-OpenCV recording video and audio to file")

 for {

 if !stopVideo { // to prevent segmentation fault

 if webCamera.GrabFrame() {
 imgFrame := webCamera.RetrieveFrame(1)

 if imgFrame != nil {
 win.ShowImage(imgFrame)

 // save frame to video file
 n := videoFileWriter.WriteFrame(imgFrame)
 frameCounter += n

 // 0.16 delay = 6 fps
 //time.Sleep(time.Duration(16/100) * time.Second)

 }
 }
 } else {
 break
 }
 }
 webCamera.Release()

 }

 func audioFeed(audioFileName string) {

 // for Audio
 waveFile, err := os.Create(audioFileName)
 errCheck(err)

 framesPerBuffer := make([]byte, 1024)

 // init PortAudio
 portaudio.Initialize()

 //use our default input parameters
 inputDevice, err := portaudio.DefaultInputDevice()
 errCheck(err)

 fmt.Println("Using [", inputDevice.Name, "] to record.")

 inputChannels := inputDevice.MaxInputChannels
 outputChannels := inputDevice.MaxOutputChannels
 // must match your microphone's sample rate - otherwise you will get Input Overflowed error
 sampleRate := inputDevice.DefaultSampleRate

 audioStream, err := portaudio.OpenDefaultStream(inputChannels, outputChannels, sampleRate, len(framesPerBuffer), framesPerBuffer)
 errCheck(err)

 // setup Wave file writer

 param := wave.WriterParam{
 Out: waveFile,
 Channel: inputChannels,
 SampleRate: int(sampleRate),
 BitsPerSample: 8, // if 16, change to WriteSample16()
 }

 waveWriter, err = wave.NewWriter(param)
 errCheck(err)

 // start reading from microphone
 errCheck(audioStream.Start())
 for {
 if !stopAudio { // to prevent segmentation fault
 audioStream.Read()

 fmt.Printf("\rRecording video and audio now. Wave or say something to your microphone! [%v]", ticker[rand.Intn(len(ticker)-1)])

 // write to wave file
 _, err := waveWriter.Write([]byte(framesPerBuffer)) // WriteSample16 for 16 bits
 errCheck(err)
 } else {
 break
 }

 }
 errCheck(audioStream.Stop())
 audioStream.Close()
 portaudio.Terminate()

 }

 func main() {

 // maximize CPU usage for multi threading
 runtime.GOMAXPROCS(runtime.NumCPU())

 // for ticker
 rand.Seed(time.Now().UnixNano())

 if len(os.Args) != 2 {
 fmt.Printf("Usage : %s <save to filename>\n", os.Args[0])
 os.Exit(0)
 }

 videoFileName := os.Args[1]

 // remove extension and add .wav
 ext := path.Ext(videoFileName)
 audioFileName := videoFileName[0:len(videoFileName)-len(ext)] + ".wav"

 fmt.Println("Press ESC key to to quit")
 // go routine to intercept ESC key
 // since opencv.WaitKey does not work on my Mac :(

 go func() {

 key := C.getch()
 fmt.Println()
 fmt.Println("Cleaning up ...")
 if key == 27 {

 // stop audio first
 stopAudio = true
 waveWriter.Close()
 fmt.Println("Audio closed.")

 // now we merge the video and audio files

 fmt.Println("Number of frames : ", frameCounter)

 // remember.... we are calculating Frames Per [Second]
 elapsedTime := time.Since(startTime).Seconds()
 fmt.Println("Elapsed time in seconds : ", elapsedTime)
 recordedFPS := float64(frameCounter) / elapsedTime
 fmt.Println("Recorded FPS : ", recordedFPS)

 // stop video
 stopVideo = true
 win.Destroy()
 fmt.Println("Video closed.")

 // now we merge the video and audio files

 // taken from https://github.com/JRodrigoF/AVrecordeR/blob/master/AVrecordeR.py
 // If the fps rate was higher/lower than expected, re-encode it to the expected

 if math.Abs(recordedFPS-6) >= 0.01 {

 fmt.Println("Re-encoding")
 recordedFPSstring := fmt.Sprintf("%0.2f", recordedFPS)
 fmt.Println("recordedFPS string : ", recordedFPSstring)

 // use -loglevel quiet to suppress ffmpeg errors and exit status 1
 // which will kill our program

 os.Remove("temp_" + videoFileName)

 // having white spaces in exec.Command() 
 // will return non-zero exit status. Golang does not have shell=true yet
 //cmd := exec.Command("ffmpeg", "-r "+recordedFPSstring+" -i "+videoFileName+" -r 6 temp_"+videoFileName)

 cmd := exec.Command("ffmpeg", "-r", recordedFPSstring, "-i", videoFileName, "-r", "6", "temp_"+videoFileName)
 err := cmd.Run()
 fmt.Println("Error from ffmpeg re-encoding : ", err)

 fmt.Println("Muxing")
 //cmd = exec.Command("ffmpeg", "-i temp_"+videoFileName+" -i "+audioFileName+" final_"+videoFileName)
 cmd = exec.Command("ffmpeg", "-i", "temp_"+videoFileName, "-i", audioFileName, "final_"+videoFileName)
 ffmpegdata, err := cmd.Output()
 fmt.Println("Error from ffmpeg muxing : ", err)
 fmt.Println(ffmpegdata)

 } else {
 fmt.Println("Normal recording and muxing")

 cmd := exec.Command("ffmpeg", "-i", videoFileName+".tmp", "-i", audioFileName, "final_"+videoFileName)
 fmt.Println(cmd)
 cmd.Run()
 //errCheck(err)
 }

 fmt.Println("Play final_", videoFileName, "with a video player to see the result!")
 os.Exit(0)
 }
 }()

 // almost... almost simultaneously...
 go audioFeed(audioFileName)
 videoFeed(videoFileName)

 // loop forever until escape key is pressed
 select {}
 }

NOTES

I reckon there are few better solutions out there.

One is at https://ffmpeg.org/ffmpeg.html#toc-Video-and-Audio-grabbing

and another is a pure Golang solution at https://github.com/3d0c/gmf/blob/master/examples/encoding_mp4.go. However, my attempts to run it generated these error messages:

[mp4 @ 0x600da00] Using AVStream.codec.timebase as a timebase hint to the muxer is deprecated. Set AVStream.timebase instead. [mp4 @ 0x600da00] Using AVStream.codec to pass codec parameters to muxers is deprecated, use AVStream.codecpar instead. [mp4 @ 0x600da00] Using AVStream.codec.timebase as a timebase hint to the muxer is deprecated. Set AVStream.timebase instead. [mp4 @ 0x600da00] Using AVStream.codec to pass codec parameters to muxers is deprecated, use AVStream.codecpar instead.

Happy coding!

References:

https://socketloop.com/tutorials/golang-record-voice-audio-from-microphone-to-wav-file

https://github.com/JRodrigoF/AVrecordeR/blob/master/AVrecordeR.py

https://ffmpeg.org/ffmpeg.html

https://socketloop.com/tutorials/golang-save-webcamera-frames-to-video-file





By Adam Ng

IF you gain some knowledge or the information here solved your programming problem. Please consider donating to the less fortunate or some charities that you like. Apart from donation, planting trees, volunteering or reducing your carbon footprint will be great too.


Advertisement