Request for Help on Using Hardware Transcoding
Closed this issue · 0 comments
As a Go beginner, this issue has troubled me for three days. I have tried using both SoftwareScaleContext
and Filters
to convert frames into images. Despite my efforts to understand the code in the demo, I still haven't achieved the result I want. Currently, I can successfully transcode using SoftwareScaleContext
, but as soon as I do, the memory usage keeps increasing, and I can't figure out which part of the code is causing the problem. When trying the Filters
approach, I've spent the whole day without making any progress. Can someone help me fix the code below so that both methods work properly?
I'm currently using the go-astiav
version from the n6.1.1
branch. As for why I'm not using the latest version, it's because I'm working with ffmpeg-rockchip
, and the latest 7.0 branch of ffmpeg-rockchip
is not compatible with go-astiav
.
package main
import (
"errors"
"fmt"
"image"
"strconv"
"github.com/asticode/go-astiav"
log "github.com/darkit/slog"
"github.com/deepch/vdk/av"
"github.com/deepch/vdk/codec/h264parser"
"github.com/deepch/vdk/codec/h265parser"
)
type VideoDecoder struct {
fps int
width int
height int
decCodec *astiav.Codec
decCodecContext *astiav.CodecContext
hardwareDeviceContext *astiav.HardwareDeviceContext
hardwarePixelFormat astiav.PixelFormat
softwareFrame *astiav.Frame
hardwareFrame *astiav.Frame
rgbaFrame *astiav.Frame
avPkt *astiav.Packet
swsContext *astiav.SoftwareScaleContext
filterGraph *astiav.FilterGraph
buffersrcContext *astiav.FilterContext
buffersinkContext *astiav.FilterContext
}
func NewVideoDecoder(stream av.CodecData, useHardware bool) (*VideoDecoder, error) {
var (
codecID astiav.CodecID
extraData []byte
fps int
width int
height int
)
astiav.SetLogLevel(astiav.LogLevelDebug)
switch stream.Type() {
case av.H264:
codecID = astiav.CodecIDH264
h264 := stream.(h264parser.CodecData)
fps = h264.FPS()
extraData = h264.AVCDecoderConfRecordBytes()
width, height = h264.Width(), h264.Height()
case av.H265:
codecID = astiav.CodecIDH265
h265 := stream.(h265parser.CodecData)
fps = h265.FPS()
extraData = h265.AVCDecoderConfRecordBytes()
width, height = h265.Width(), h265.Height()
default:
return nil, fmt.Errorf("unsupported codec type: %v", stream.Type())
}
v := &VideoDecoder{
fps: fps,
width: width,
height: height,
}
// Initialize decoder
if useHardware {
v.decCodec = findHardwareDecoder(codecID)
} else {
v.decCodec = astiav.FindDecoder(codecID)
}
if v.decCodec == nil {
return nil, fmt.Errorf("decoder not found: %v", codecID)
}
log.Infof("Found hardware-accelerated decoder: %v", codecID.String())
v.decCodecContext = astiav.AllocCodecContext(v.decCodec)
if v.decCodecContext == nil {
return nil, fmt.Errorf("unable to allocate decoder context")
}
v.decCodecContext.SetWidth(width)
v.decCodecContext.SetHeight(height)
v.decCodecContext.SetExtraData(extraData)
if useHardware {
// Initialize hardware decoding
hardwareDeviceType := astiav.FindHardwareDeviceTypeByName("rkmpp")
//hardwareDeviceType := astiav.FindHardwareDeviceTypeByName(v.decCodec.String())
for _, p := range v.decCodec.HardwareConfigs() {
if p.MethodFlags().Has(astiav.CodecHardwareConfigMethodFlagHwDeviceCtx) && p.HardwareDeviceType() == hardwareDeviceType {
v.hardwarePixelFormat = p.PixelFormat()
break
}
}
v.hardwareDeviceContext, _ = astiav.CreateHardwareDeviceContext(hardwareDeviceType, "0", nil)
if v.hardwareDeviceContext != nil {
v.decCodecContext.SetHardwareDeviceContext(v.hardwareDeviceContext)
v.decCodecContext.SetPixelFormatCallback(func(pfs []astiav.PixelFormat) astiav.PixelFormat {
log.Infof("list: %v", pfs)
for _, pf := range pfs {
if pf == v.hardwarePixelFormat {
v.hardwarePixelFormat = pf
return pf
}
}
return astiav.PixelFormatNone
})
}
log.Info("Video codec info", "stream", stream.Type().String(), "decoder", v.decCodec.String(), "pixel", v.hardwarePixelFormat.String())
}
if err := v.decCodecContext.Open(v.decCodec, nil); err != nil {
return nil, fmt.Errorf("failed to open decoder: %v", err)
}
v.softwareFrame = astiav.AllocFrame()
v.hardwareFrame = astiav.AllocFrame()
v.rgbaFrame = astiav.AllocFrame()
v.avPkt = astiav.AllocPacket()
return v, nil
}
func (v *VideoDecoder) DecodePacketWithSoftwareScaleContext(pkt *av.Packet) (image.Image, error) {
defer v.hardwareFrame.Unref()
defer v.softwareFrame.Unref()
defer v.rgbaFrame.Unref()
defer v.avPkt.Unref()
_ = v.avPkt.FromData(pkt.Data)
if err := v.decCodecContext.SendPacket(v.avPkt); err != nil {
return nil, fmt.Errorf("failed to send packet to decoder: %v", err)
}
for {
err := v.decCodecContext.ReceiveFrame(v.hardwareFrame)
if err != nil {
if errors.Is(err, astiav.ErrEagain) || errors.Is(err, astiav.ErrEof) {
return nil, nil
}
return nil, fmt.Errorf("failed to receive frame: %v", err)
}
var frame *astiav.Frame
if v.hardwareFrame.PixelFormat() == v.hardwarePixelFormat {
if err := v.hardwareFrame.TransferHardwareData(v.softwareFrame); err != nil {
return nil, fmt.Errorf("failed to transfer hardware frame to software frame: %v", err)
}
v.softwareFrame.SetPts(v.hardwareFrame.Pts())
frame = v.softwareFrame
} else {
frame = v.hardwareFrame
}
if v.swsContext == nil {
v.swsContext, err = astiav.CreateSoftwareScaleContext(
frame.Width(), frame.Height(), frame.PixelFormat(),
frame.Width(), frame.Height(), astiav.PixelFormatRgba,
astiav.NewSoftwareScaleContextFlags(astiav.SoftwareScaleContextFlagBilinear),
)
if err != nil {
return nil, fmt.Errorf("failed to create software scaling context: %v", err)
}
}
if err := v.swsContext.ScaleFrame(frame, v.rgbaFrame); err != nil {
return nil, fmt.Errorf("failed to scale frame: %v", err)
}
img, err := v.rgbaFrame.Data().GuessImageFormat()
if err != nil {
return nil, fmt.Errorf("failed to guess image format: %v", err)
}
if err := v.rgbaFrame.Data().ToImage(img); err != nil {
return nil, fmt.Errorf("failed to convert frame to image: %v", err)
}
return img, nil
}
}
func (v *VideoDecoder) DecodePacketWithFilter(pkt *av.Packet) (image.Image, error) {
defer v.hardwareFrame.Unref()
defer v.softwareFrame.Unref()
defer v.rgbaFrame.Unref()
defer v.avPkt.Unref()
_ = v.avPkt.FromData(pkt.Data)
if err := v.decCodecContext.SendPacket(v.avPkt); err != nil {
return nil, fmt.Errorf("failed to send packet to decoder: %v", err)
}
for {
err := v.decCodecContext.ReceiveFrame(v.hardwareFrame)
if err != nil {
if errors.Is(err, astiav.ErrEagain) || errors.Is(err, astiav.ErrEof) {
return nil, nil
}
return nil, fmt.Errorf("failed to receive frame: %v", err)
}
var frame *astiav.Frame
if v.hardwareFrame.PixelFormat() == v.hardwarePixelFormat {
if err = v.hardwareFrame.TransferHardwareData(v.softwareFrame); err != nil {
return nil, fmt.Errorf("failed to transfer hardware frame to software frame: %v", err)
}
v.softwareFrame.SetPts(v.hardwareFrame.Pts())
frame = v.softwareFrame
} else {
frame = v.hardwareFrame
}
if v.filterGraph == nil {
v.filterGraph = astiav.AllocFilterGraph()
buffersrc := astiav.FindFilterByName("buffer")
buffersink := astiav.FindFilterByName("buffersink")
args := astiav.FilterArgs{
"video_size": fmt.Sprintf("%dx%d", v.decCodecContext.Width(), v.decCodecContext.Height()),
"pix_fmt": strconv.Itoa(int(v.decCodecContext.PixelFormat())),
"pixel_aspect": v.decCodecContext.SampleAspectRatio().String(),
"time_base": "1/1000",
}
v.buffersrcContext, err = v.filterGraph.NewFilterContext(buffersrc, "in", args)
if err != nil {
return nil, fmt.Errorf("failed to create buffersrc context: %v", err)
}
v.buffersinkContext, err = v.filterGraph.NewFilterContext(buffersink, "out", nil)
if err != nil {
return nil, fmt.Errorf("failed to create buffersink context: %v", err)
}
outputs := []*astiav.FilterInOut{
astiav.NewFilterInOut().SetContext(v.buffersrcContext).SetPadIdx(0).SetNext(nil),
}
inputs := []*astiav.FilterInOut{
astiav.NewFilterInOut().SetContext(v.buffersinkContext).SetPadIdx(0).SetNext(nil),
}
if err := v.filterGraph.Parse(inputs, outputs); err != nil {
return nil, fmt.Errorf("failed to parse filter graph: %v", err)
}
if err := v.filterGraph.Configure(); err != nil {
return nil, fmt.Errorf("failed to configure filter graph: %v", err)
}
}
if err := v.buffersrcContext.AddFrame(frame); err != nil {
return nil, fmt.Errorf("failed to add frame to filter graph: %v", err)
}
for {
err := v.buffersinkContext.GetFrame(v.softwareFrame)
if err != nil {
if errors.Is(err, astiav.ErrEagain) || errors.Is(err, astiav.ErrEof) {
break
}
return nil, fmt.Errorf("failed to get frame from filter graph: %v", err)
}
if v.swsContext == nil {
v.swsContext, err = astiav.CreateSoftwareScaleContext(
v.softwareFrame.Width(), v.softwareFrame.Height(), v.softwareFrame.PixelFormat(),
v.softwareFrame.Width(), v.softwareFrame.Height(), astiav.PixelFormatRgba,
astiav.NewSoftwareScaleContextFlags(astiav.SoftwareScaleContextFlagBilinear),
)
if err != nil {
return nil, fmt.Errorf("failed to create software scaling context: %v", err)
}
}
if err := v.swsContext.ScaleFrame(v.softwareFrame, v.rgbaFrame); err != nil {
return nil, fmt.Errorf("failed to scale frame: %v", err)
}
img, err := v.rgbaFrame.Data().GuessImageFormat()
if err != nil {
return nil, fmt.Errorf("failed to guess image format: %v", err)
}
if err := v.rgbaFrame.Data().ToImage(img); err != nil {
return nil, fmt.Errorf("failed to convert frame to image: %v", err)
}
return img, nil
}
}
}
func findHardwareDecoder(codecID astiav.CodecID) *astiav.Codec {
decoder := astiav.FindDecoder(codecID)
if decoder == nil {
return nil
}
hwConfigs := decoder.HardwareConfigs()
if len(hwConfigs) > 0 {
return decoder
}
for _, codec := range astiav.Codecs() {
if codec.ID() == codecID && codec.IsDecoder() {
hwConfigs = codec.HardwareConfigs()
if len(hwConfigs) > 0 {
return codec
}
}
}
return nil
}