Golang : Merge video(OpenCV) and audio(PortAudio) into a mp4 file
The previous Golang tutorials cover on how to capture image frames from a web camera with OpenCV and record voice from microphone with PortAudio. For this tutorial, we will learn how to merge video stream and audio stream into a single file with the help of ffmpeg.
In the code example that follows, we will launch a goroutine to capture audio stream from microphone and simultaneously capture video stream from camera. The audio and video data will be encoded separately into two files and merge into a singe mp4 file with ffmpeg. AFAIK, there is no single stream way of capturing both audio and video from a web camera yet in Golang. This is the best solution that I can come up with. It is based on a similar program but written in Python - https://github.com/JRodrigoF/AVrecordeR/blob/master/AVrecordeR.py
Make sure you have OpenCV and PortAudio installed on your computer before building the code below.
Here you go!
package main
/*
#include <stdio.h>
#include <unistd.h>
#include <termios.h>
char getch(){
char ch = 0;
struct termios old = {0};
fflush(stdout);
if( tcgetattr(0, &old) < 0 ) perror("tcsetattr()");
old.c_lflag &= ~ICANON;
old.c_lflag &= ~ECHO;
old.c_cc[VMIN] = 1;
old.c_cc[VTIME] = 0;
if( tcsetattr(0, TCSANOW, &old) < 0 ) perror("tcsetattr ICANON");
if( read(0, &ch,1) < 0 ) perror("read()");
old.c_lflag |= ICANON;
old.c_lflag |= ECHO;
if(tcsetattr(0, TCSADRAIN, &old) < 0) perror("tcsetattr ~ICANON");
return ch;
}
*/
import "C"
// stackoverflow.com/questions/14094190/golang-function-similar-to-getchar
import (
"fmt"
"github.com/gordonklaus/portaudio"
"github.com/lazywei/go-opencv/opencv"
wave "github.com/zenwerk/go-wave"
"math"
"math/rand"
"os"
"os/exec"
"path"
"runtime"
"time"
)
//global variables
var (
stopAudio = false
stopVideo = false
win = new(opencv.Window)
waveWriter = new(wave.Writer)
frameCounter = 1
startTime = time.Now()
)
// recording in progress ticker. From good old DOS days.
var ticker = []string{
"-",
"\\",
"/",
"|",
}
func errCheck(err error) {
if err != nil {
panic(err)
}
}
func videoFeed(videoFileName string) {
// activate webCamera
webCamera := opencv.NewCameraCapture(opencv.CV_CAP_ANY) // autodetect
if webCamera == nil {
panic("Unable to open camera")
}
// !! NEED TO CHECK IF YOUR OS HAS THE CODECS INSTALLED BEFORE SELECTING THE CODEC !!
// !! OTHERWISE, YOU WILL GET A VERY SMALL & CORRUPT VIDEO FILE !!
// see http://www.fourcc.org/codecs.php for other possible combinations
// opencv.FOURCC('p', 'i', 'm', '1') // MPEG-1 codec
// opencv.FOURCC('m', 'j', 'p', 'g') // motion-jpeg codec
// opencv.FOURCC('m', 'p', '4', '2') // MPEG-4.2 codec
// opencv.FOURCC('d', 'i', 'v', '3') // MPEG-4.3 codec
// opencv.FOURCC('m', 'p', '4', 'v') // MPEG-4 codec
// opencv.FOURCC('u', '2', '6', '3') // H263 codec
// opencv.FOURCC('i', '2', '6', '3') // H263I codec
// opencv.FOURCC('f', 'l', 'v', '1') // FLV1 codec
// codec := opencv.CV_FOURCC_PROMPT // Windows only. Prompt for codec selection
// codec := int(webCamera.GetProperty(opencv.CV_CAP_PROP_FOURCC)) -- won't work on my Mac
codec := int(opencv.FOURCC('m', 'p', '4', 'v')) // must be lower case, upper case will screw the file...
fps := float32(25) // 25 frames per second
//fps := float32(webCamera.GetProperty(opencv.CV_CAP_PROP_POS_FRAMES))
frameWidth := int(webCamera.GetProperty(opencv.CV_CAP_PROP_FRAME_WIDTH))
frameHeight := int(webCamera.GetProperty(opencv.CV_CAP_PROP_FRAME_HEIGHT))
isColor := 1 // 0 = false(grayscale), 1 = true -- for Windows only I think
// !! IMPORTANT : Remember to set the type to frameWidth and frameHeight for
// for both input(src) and output(destination) as the same
// otherwise, you gonna get this error message -
// [OpenCV Error: Assertion failed (dst.data == dst0.data) in cvCvtColor,]
// Just a note, you still can resize the frames before writing to file if you want
// for more info, read http://docs.opencv.org/trunk/dd/d9e/classcv_1_1VideoWriter.html
videoFileWriter := opencv.NewVideoWriter(videoFileName, codec, fps, frameWidth, frameHeight, isColor)
win = opencv.NewWindow("Go-OpenCV recording video and audio to file")
for {
if !stopVideo { // to prevent segmentation fault
if webCamera.GrabFrame() {
imgFrame := webCamera.RetrieveFrame(1)
if imgFrame != nil {
win.ShowImage(imgFrame)
// save frame to video file
n := videoFileWriter.WriteFrame(imgFrame)
frameCounter += n
// 0.16 delay = 6 fps
//time.Sleep(time.Duration(16/100) * time.Second)
}
}
} else {
break
}
}
webCamera.Release()
}
func audioFeed(audioFileName string) {
// for Audio
waveFile, err := os.Create(audioFileName)
errCheck(err)
framesPerBuffer := make([]byte, 1024)
// init PortAudio
portaudio.Initialize()
//use our default input parameters
inputDevice, err := portaudio.DefaultInputDevice()
errCheck(err)
fmt.Println("Using [", inputDevice.Name, "] to record.")
inputChannels := inputDevice.MaxInputChannels
outputChannels := inputDevice.MaxOutputChannels
// must match your microphone's sample rate - otherwise you will get Input Overflowed error
sampleRate := inputDevice.DefaultSampleRate
audioStream, err := portaudio.OpenDefaultStream(inputChannels, outputChannels, sampleRate, len(framesPerBuffer), framesPerBuffer)
errCheck(err)
// setup Wave file writer
param := wave.WriterParam{
Out: waveFile,
Channel: inputChannels,
SampleRate: int(sampleRate),
BitsPerSample: 8, // if 16, change to WriteSample16()
}
waveWriter, err = wave.NewWriter(param)
errCheck(err)
// start reading from microphone
errCheck(audioStream.Start())
for {
if !stopAudio { // to prevent segmentation fault
audioStream.Read()
fmt.Printf("\rRecording video and audio now. Wave or say something to your microphone! [%v]", ticker[rand.Intn(len(ticker)-1)])
// write to wave file
_, err := waveWriter.Write([]byte(framesPerBuffer)) // WriteSample16 for 16 bits
errCheck(err)
} else {
break
}
}
errCheck(audioStream.Stop())
audioStream.Close()
portaudio.Terminate()
}
func main() {
// maximize CPU usage for multi threading
runtime.GOMAXPROCS(runtime.NumCPU())
// for ticker
rand.Seed(time.Now().UnixNano())
if len(os.Args) != 2 {
fmt.Printf("Usage : %s <save to filename>\n", os.Args[0])
os.Exit(0)
}
videoFileName := os.Args[1]
// remove extension and add .wav
ext := path.Ext(videoFileName)
audioFileName := videoFileName[0:len(videoFileName)-len(ext)] + ".wav"
fmt.Println("Press ESC key to to quit")
// go routine to intercept ESC key
// since opencv.WaitKey does not work on my Mac :(
go func() {
key := C.getch()
fmt.Println()
fmt.Println("Cleaning up ...")
if key == 27 {
// stop audio first
stopAudio = true
waveWriter.Close()
fmt.Println("Audio closed.")
// now we merge the video and audio files
fmt.Println("Number of frames : ", frameCounter)
// remember.... we are calculating Frames Per [Second]
elapsedTime := time.Since(startTime).Seconds()
fmt.Println("Elapsed time in seconds : ", elapsedTime)
recordedFPS := float64(frameCounter) / elapsedTime
fmt.Println("Recorded FPS : ", recordedFPS)
// stop video
stopVideo = true
win.Destroy()
fmt.Println("Video closed.")
// now we merge the video and audio files
// taken from https://github.com/JRodrigoF/AVrecordeR/blob/master/AVrecordeR.py
// If the fps rate was higher/lower than expected, re-encode it to the expected
if math.Abs(recordedFPS-6) >= 0.01 {
fmt.Println("Re-encoding")
recordedFPSstring := fmt.Sprintf("%0.2f", recordedFPS)
fmt.Println("recordedFPS string : ", recordedFPSstring)
// use -loglevel quiet to suppress ffmpeg errors and exit status 1
// which will kill our program
os.Remove("temp_" + videoFileName)
// having white spaces in exec.Command()
// will return non-zero exit status. Golang does not have shell=true yet
//cmd := exec.Command("ffmpeg", "-r "+recordedFPSstring+" -i "+videoFileName+" -r 6 temp_"+videoFileName)
cmd := exec.Command("ffmpeg", "-r", recordedFPSstring, "-i", videoFileName, "-r", "6", "temp_"+videoFileName)
err := cmd.Run()
fmt.Println("Error from ffmpeg re-encoding : ", err)
fmt.Println("Muxing")
//cmd = exec.Command("ffmpeg", "-i temp_"+videoFileName+" -i "+audioFileName+" final_"+videoFileName)
cmd = exec.Command("ffmpeg", "-i", "temp_"+videoFileName, "-i", audioFileName, "final_"+videoFileName)
ffmpegdata, err := cmd.Output()
fmt.Println("Error from ffmpeg muxing : ", err)
fmt.Println(ffmpegdata)
} else {
fmt.Println("Normal recording and muxing")
cmd := exec.Command("ffmpeg", "-i", videoFileName+".tmp", "-i", audioFileName, "final_"+videoFileName)
fmt.Println(cmd)
cmd.Run()
//errCheck(err)
}
fmt.Println("Play final_", videoFileName, "with a video player to see the result!")
os.Exit(0)
}
}()
// almost... almost simultaneously...
go audioFeed(audioFileName)
videoFeed(videoFileName)
// loop forever until escape key is pressed
select {}
}
NOTES
I reckon there are few better solutions out there.
One is at https://ffmpeg.org/ffmpeg.html#toc-Video-and-Audio-grabbing
and another is a pure Golang solution at https://github.com/3d0c/gmf/blob/master/examples/encoding_mp4.go. However, my attempts to run it generated these error messages:
[mp4 @ 0x600da00] Using AVStream.codec.timebase as a timebase hint to the muxer is deprecated. Set AVStream.timebase instead. [mp4 @ 0x600da00] Using AVStream.codec to pass codec parameters to muxers is deprecated, use AVStream.codecpar instead. [mp4 @ 0x600da00] Using AVStream.codec.timebase as a timebase hint to the muxer is deprecated. Set AVStream.timebase instead. [mp4 @ 0x600da00] Using AVStream.codec to pass codec parameters to muxers is deprecated, use AVStream.codecpar instead.
Happy coding!
References:
https://socketloop.com/tutorials/golang-record-voice-audio-from-microphone-to-wav-file
https://github.com/JRodrigoF/AVrecordeR/blob/master/AVrecordeR.py
https://ffmpeg.org/ffmpeg.html
https://socketloop.com/tutorials/golang-save-webcamera-frames-to-video-file
By Adam Ng
IF you gain some knowledge or the information here solved your programming problem. Please consider donating to the less fortunate or some charities that you like. Apart from donation, planting trees, volunteering or reducing your carbon footprint will be great too.
Advertisement
Tutorials
+12.4k Golang : Drop cookie to visitor's browser and http.SetCookie() example
+6.9k Golang : Dealing with postal or zip code example
+11.2k Golang : Format numbers to nearest thousands such as kilos millions billions and trillions
+51.9k Golang : How to get struct field and value by name
+35.8k Golang : How to split or chunking a file to smaller pieces?
+13.6k Golang : How to check if a file is hidden?
+6k Golang : Test input string for unicode example
+11.5k Golang : convert(cast) float to string
+9k Golang : How to get garbage collection data?
+8.9k Golang : How to check if a string with spaces in between is numeric?
+8.5k Android Studio : Image button and button example
+7.3k Gogland : Single File versus Go Application Run Configurations