asticode/go-astiav

Request for Help on Using Hardware Transcoding

Closed this issue · 0 comments

As a Go beginner, this issue has troubled me for three days. I have tried using both SoftwareScaleContext and Filters to convert frames into images. Despite my efforts to understand the code in the demo, I still haven't achieved the result I want. Currently, I can successfully transcode using SoftwareScaleContext, but as soon as I do, the memory usage keeps increasing, and I can't figure out which part of the code is causing the problem. When trying the Filters approach, I've spent the whole day without making any progress. Can someone help me fix the code below so that both methods work properly?

I'm currently using the go-astiav version from the n6.1.1 branch. As for why I'm not using the latest version, it's because I'm working with ffmpeg-rockchip, and the latest 7.0 branch of ffmpeg-rockchip is not compatible with go-astiav.

package main

import (
	"errors"
	"fmt"
	"image"
	"strconv"

	"github.com/asticode/go-astiav"
	log "github.com/darkit/slog"
	"github.com/deepch/vdk/av"
	"github.com/deepch/vdk/codec/h264parser"
	"github.com/deepch/vdk/codec/h265parser"
)

type VideoDecoder struct {
	fps                   int
	width                 int
	height                int
	decCodec              *astiav.Codec
	decCodecContext       *astiav.CodecContext
	hardwareDeviceContext *astiav.HardwareDeviceContext
	hardwarePixelFormat   astiav.PixelFormat
	softwareFrame         *astiav.Frame
	hardwareFrame         *astiav.Frame
	rgbaFrame             *astiav.Frame
	avPkt                 *astiav.Packet
	swsContext            *astiav.SoftwareScaleContext
	filterGraph           *astiav.FilterGraph
	buffersrcContext      *astiav.FilterContext
	buffersinkContext     *astiav.FilterContext
}

func NewVideoDecoder(stream av.CodecData, useHardware bool) (*VideoDecoder, error) {
	var (
		codecID   astiav.CodecID
		extraData []byte
		fps       int
		width     int
		height    int
	)

	astiav.SetLogLevel(astiav.LogLevelDebug)

	switch stream.Type() {
	case av.H264:
		codecID = astiav.CodecIDH264
		h264 := stream.(h264parser.CodecData)
		fps = h264.FPS()
		extraData = h264.AVCDecoderConfRecordBytes()
		width, height = h264.Width(), h264.Height()
	case av.H265:
		codecID = astiav.CodecIDH265
		h265 := stream.(h265parser.CodecData)
		fps = h265.FPS()
		extraData = h265.AVCDecoderConfRecordBytes()
		width, height = h265.Width(), h265.Height()
	default:
		return nil, fmt.Errorf("unsupported codec type: %v", stream.Type())
	}

	v := &VideoDecoder{
		fps:    fps,
		width:  width,
		height: height,
	}

	// Initialize decoder
	if useHardware {
		v.decCodec = findHardwareDecoder(codecID)
	} else {
		v.decCodec = astiav.FindDecoder(codecID)
	}
	if v.decCodec == nil {
		return nil, fmt.Errorf("decoder not found: %v", codecID)
	}
	log.Infof("Found hardware-accelerated decoder: %v", codecID.String())

	v.decCodecContext = astiav.AllocCodecContext(v.decCodec)
	if v.decCodecContext == nil {
		return nil, fmt.Errorf("unable to allocate decoder context")
	}

	v.decCodecContext.SetWidth(width)
	v.decCodecContext.SetHeight(height)
	v.decCodecContext.SetExtraData(extraData)

	if useHardware {
		// Initialize hardware decoding
		hardwareDeviceType := astiav.FindHardwareDeviceTypeByName("rkmpp")
		//hardwareDeviceType := astiav.FindHardwareDeviceTypeByName(v.decCodec.String())
		for _, p := range v.decCodec.HardwareConfigs() {
			if p.MethodFlags().Has(astiav.CodecHardwareConfigMethodFlagHwDeviceCtx) && p.HardwareDeviceType() == hardwareDeviceType {
				v.hardwarePixelFormat = p.PixelFormat()
				break
			}
		}

		v.hardwareDeviceContext, _ = astiav.CreateHardwareDeviceContext(hardwareDeviceType, "0", nil)
		if v.hardwareDeviceContext != nil {
			v.decCodecContext.SetHardwareDeviceContext(v.hardwareDeviceContext)
			v.decCodecContext.SetPixelFormatCallback(func(pfs []astiav.PixelFormat) astiav.PixelFormat {
				log.Infof("list: %v", pfs)
				for _, pf := range pfs {
					if pf == v.hardwarePixelFormat {
						v.hardwarePixelFormat = pf
						return pf
					}
				}
				return astiav.PixelFormatNone
			})
		}

		log.Info("Video codec info", "stream", stream.Type().String(), "decoder", v.decCodec.String(), "pixel", v.hardwarePixelFormat.String())
	}

	if err := v.decCodecContext.Open(v.decCodec, nil); err != nil {
		return nil, fmt.Errorf("failed to open decoder: %v", err)
	}

	v.softwareFrame = astiav.AllocFrame()
	v.hardwareFrame = astiav.AllocFrame()
	v.rgbaFrame = astiav.AllocFrame()
	v.avPkt = astiav.AllocPacket()

	return v, nil
}

func (v *VideoDecoder) DecodePacketWithSoftwareScaleContext(pkt *av.Packet) (image.Image, error) {
	defer v.hardwareFrame.Unref()
	defer v.softwareFrame.Unref()
	defer v.rgbaFrame.Unref()
	defer v.avPkt.Unref()

	_ = v.avPkt.FromData(pkt.Data)

	if err := v.decCodecContext.SendPacket(v.avPkt); err != nil {
		return nil, fmt.Errorf("failed to send packet to decoder: %v", err)
	}

	for {
		err := v.decCodecContext.ReceiveFrame(v.hardwareFrame)
		if err != nil {
			if errors.Is(err, astiav.ErrEagain) || errors.Is(err, astiav.ErrEof) {
				return nil, nil
			}
			return nil, fmt.Errorf("failed to receive frame: %v", err)
		}

		var frame *astiav.Frame

		if v.hardwareFrame.PixelFormat() == v.hardwarePixelFormat {
			if err := v.hardwareFrame.TransferHardwareData(v.softwareFrame); err != nil {
				return nil, fmt.Errorf("failed to transfer hardware frame to software frame: %v", err)
			}

			v.softwareFrame.SetPts(v.hardwareFrame.Pts())
			frame = v.softwareFrame
		} else {
			frame = v.hardwareFrame
		}

		if v.swsContext == nil {
			v.swsContext, err = astiav.CreateSoftwareScaleContext(
				frame.Width(), frame.Height(), frame.PixelFormat(),
				frame.Width(), frame.Height(), astiav.PixelFormatRgba,
				astiav.NewSoftwareScaleContextFlags(astiav.SoftwareScaleContextFlagBilinear),
			)
			if err != nil {
				return nil, fmt.Errorf("failed to create software scaling context: %v", err)
			}
		}

		if err := v.swsContext.ScaleFrame(frame, v.rgbaFrame); err != nil {
			return nil, fmt.Errorf("failed to scale frame: %v", err)
		}

		img, err := v.rgbaFrame.Data().GuessImageFormat()
		if err != nil {
			return nil, fmt.Errorf("failed to guess image format: %v", err)
		}

		if err := v.rgbaFrame.Data().ToImage(img); err != nil {
			return nil, fmt.Errorf("failed to convert frame to image: %v", err)
		}

		return img, nil
	}
}

func (v *VideoDecoder) DecodePacketWithFilter(pkt *av.Packet) (image.Image, error) {
	defer v.hardwareFrame.Unref()
	defer v.softwareFrame.Unref()
	defer v.rgbaFrame.Unref()
	defer v.avPkt.Unref()

	_ = v.avPkt.FromData(pkt.Data)

	if err := v.decCodecContext.SendPacket(v.avPkt); err != nil {
		return nil, fmt.Errorf("failed to send packet to decoder: %v", err)
	}

	for {
		err := v.decCodecContext.ReceiveFrame(v.hardwareFrame)
		if err != nil {
			if errors.Is(err, astiav.ErrEagain) || errors.Is(err, astiav.ErrEof) {
				return nil, nil
			}
			return nil, fmt.Errorf("failed to receive frame: %v", err)
		}

		var frame *astiav.Frame

		if v.hardwareFrame.PixelFormat() == v.hardwarePixelFormat {
			if err = v.hardwareFrame.TransferHardwareData(v.softwareFrame); err != nil {
				return nil, fmt.Errorf("failed to transfer hardware frame to software frame: %v", err)
			}

			v.softwareFrame.SetPts(v.hardwareFrame.Pts())
			frame = v.softwareFrame
		} else {
			frame = v.hardwareFrame
		}

		if v.filterGraph == nil {
			v.filterGraph = astiav.AllocFilterGraph()
			buffersrc := astiav.FindFilterByName("buffer")
			buffersink := astiav.FindFilterByName("buffersink")

			args := astiav.FilterArgs{
				"video_size":   fmt.Sprintf("%dx%d", v.decCodecContext.Width(), v.decCodecContext.Height()),
				"pix_fmt":      strconv.Itoa(int(v.decCodecContext.PixelFormat())),
				"pixel_aspect": v.decCodecContext.SampleAspectRatio().String(),
				"time_base":    "1/1000",
			}

			v.buffersrcContext, err = v.filterGraph.NewFilterContext(buffersrc, "in", args)
			if err != nil {
				return nil, fmt.Errorf("failed to create buffersrc context: %v", err)
			}

			v.buffersinkContext, err = v.filterGraph.NewFilterContext(buffersink, "out", nil)
			if err != nil {
				return nil, fmt.Errorf("failed to create buffersink context: %v", err)
			}

			outputs := []*astiav.FilterInOut{
				astiav.NewFilterInOut().SetContext(v.buffersrcContext).SetPadIdx(0).SetNext(nil),
			}
			inputs := []*astiav.FilterInOut{
				astiav.NewFilterInOut().SetContext(v.buffersinkContext).SetPadIdx(0).SetNext(nil),
			}

			if err := v.filterGraph.Parse(inputs, outputs); err != nil {
				return nil, fmt.Errorf("failed to parse filter graph: %v", err)
			}
			if err := v.filterGraph.Configure(); err != nil {
				return nil, fmt.Errorf("failed to configure filter graph: %v", err)
			}
		}

		if err := v.buffersrcContext.AddFrame(frame); err != nil {
			return nil, fmt.Errorf("failed to add frame to filter graph: %v", err)
		}

		for {
			err := v.buffersinkContext.GetFrame(v.softwareFrame)
			if err != nil {
				if errors.Is(err, astiav.ErrEagain) || errors.Is(err, astiav.ErrEof) {
					break
				}
				return nil, fmt.Errorf("failed to get frame from filter graph: %v", err)
			}

			if v.swsContext == nil {
				v.swsContext, err = astiav.CreateSoftwareScaleContext(
					v.softwareFrame.Width(), v.softwareFrame.Height(), v.softwareFrame.PixelFormat(),
					v.softwareFrame.Width(), v.softwareFrame.Height(), astiav.PixelFormatRgba,
					astiav.NewSoftwareScaleContextFlags(astiav.SoftwareScaleContextFlagBilinear),
				)
				if err != nil {
					return nil, fmt.Errorf("failed to create software scaling context: %v", err)
				}
			}

			if err := v.swsContext.ScaleFrame(v.softwareFrame, v.rgbaFrame); err != nil {
				return nil, fmt.Errorf("failed to scale frame: %v", err)
			}

			img, err := v.rgbaFrame.Data().GuessImageFormat()
			if err != nil {
				return nil, fmt.Errorf("failed to guess image format: %v", err)
			}

			if err := v.rgbaFrame.Data().ToImage(img); err != nil {
				return nil, fmt.Errorf("failed to convert frame to image: %v", err)
			}

			return img, nil
		}
	}
}

func findHardwareDecoder(codecID astiav.CodecID) *astiav.Codec {
	decoder := astiav.FindDecoder(codecID)
	if decoder == nil {
		return nil
	}

	hwConfigs := decoder.HardwareConfigs()
	if len(hwConfigs) > 0 {
		return decoder
	}

	for _, codec := range astiav.Codecs() {
		if codec.ID() == codecID && codec.IsDecoder() {
			hwConfigs = codec.HardwareConfigs()
			if len(hwConfigs) > 0 {
				return codec
			}
		}
	}

	return nil
}