asticode/go-astiav

h264 -> h265

mateothegreat opened this issue · 2 comments

Hey there, I want to start off by saying thank you for your hard work, none of this is easy! 💪

I need to go from RTSP (h264) --> h265 and I think I'm close but I'm stumped a error encoding packet: Invalid argument error.

Basically I'm coming from an rtp.Packet from https://github.com/bluenviron/gortsplib/v4 which is h264.

Can I get some pointers on to what I may be missing or how to debug this error (it's super vague).

ffprobe output:

Input #0, rtsp, from 'rtsp://***:***@108.53.***.****12200/Streaming/Channels/101':
  Metadata:
    title           : Media Presentation
  Duration: N/A, start: 0.100000, bitrate: N/A
  Stream #0:0: Video: h264 (Main), yuvj420p(pc, bt709, progressive), 3840x2160 [SAR 1:1 DAR 16:9], 20 fps, 10 tbr, 90k tbn

Work so far:

package clients

import (
	"errors"
	"fmt"
	"log"
	"strings"

	"github.com/asticode/go-astiav"
	"github.com/bluenviron/gortsplib/v4/pkg/format"
	"github.com/nvr-ai/go-rtsp/streams"
)

type H264Client struct {
	DecoderCodec   *astiav.Codec
	DecoderContext *astiav.CodecContext
	EncoderCodec   *astiav.Codec
	EncoderContext *astiav.CodecContext
}

func NewH264Client() (*H264Client, error) {
	astiav.SetLogLevel(astiav.LogLevelVerbose)
	astiav.SetLogCallback(func(l astiav.LogLevel, fmt, msg, parent string) {
		log.Printf("ffmpeg log: %s (level: %d)\n", strings.TrimSpace(msg), l)
	})

	client := &H264Client{}

	// Get the H.264 decoder codec
	client.DecoderCodec = astiav.FindDecoder(astiav.CodecIDH264)
	if client.DecoderCodec == nil {
		return nil, errors.New("H.264 decoder not found")
	}

	// Allocate the decoder context
	client.DecoderContext = astiav.AllocCodecContext(client.DecoderCodec)
	if client.DecoderContext == nil {
		return nil, errors.New("could not allocate decoder context")
	}

	// Open the decoder
	if err := client.DecoderContext.Open(client.DecoderCodec, nil); err != nil {
		return nil, fmt.Errorf("error opening decoder: %w", err)
	}

	// Get the H.265 encoder codec
	client.EncoderCodec = astiav.FindEncoder(astiav.CodecIDHevc)
	if client.EncoderCodec == nil {
		return nil, errors.New("H.265 encoder not found")
	}

	// Allocate the encoder context
	client.EncoderContext = astiav.AllocCodecContext(client.EncoderCodec)
	if client.EncoderContext == nil {
		return nil, errors.New("could not allocate encoder context")
	}

	// Set encoder parameters
	client.EncoderContext.SetTimeBase(astiav.NewRational(1, 25))
	client.EncoderContext.SetBitRate(1000000)
	client.EncoderContext.SetWidth(1280) // Set width of the output frame
	client.EncoderContext.SetHeight(720) // Set height of the output frame
	client.EncoderContext.SetPixelFormat(astiav.PixelFormat(astiav.PixelFormatYuv420P))

	// Open the encoder
	if err := client.EncoderContext.Open(client.EncoderCodec, nil); err != nil {
		return nil, fmt.Errorf("error opening encoder: %w", err)
	}

	return client, nil
}

func (c *H264Client) DecodeVideoFrame(rtspPacket *streams.Frame) error {
	// Parse H.264 NAL units from the RTP payload
	nalUnits, err := parseRTPH264NALUnits(rtspPacket.Packet.Payload)
	if err != nil {
		return fmt.Errorf("error parsing H.264 NAL units: %w", err)
	}

	// Encode each H.264 NAL unit to H.265
	for _, nalUnit := range nalUnits {
		// Convert H.264 NAL unit to H.265 frame with start code, SPS, and PPS
		h265Frame, err := h264ToH265(rtspPacket.Media.Formats[0].(*format.H264).PPS, rtspPacket.Media.Formats[0].(*format.H264).SPS, nalUnit)
		if err != nil {
			return fmt.Errorf("error converting H.264 to H.265: %w", err)
		}

		// Allocate a packet for encoding
		packet := astiav.AllocPacket()

		// Set packet data
		packet.FromData(h265Frame)

		// Encode the packet
		if err := c.EncoderContext.SendPacket(packet); err != nil {
			return fmt.Errorf("error encoding packet: %w", err)
		}
	}

	return nil
}

func h264ToH265(pps []byte, sps []byte, h264Frame []byte) ([]byte, error) {
	// Add start code prefix if necessary
	if len(h264Frame) >= 4 && h264Frame[0] == 0x00 && h264Frame[1] == 0x00 && h264Frame[2] == 0x00 && h264Frame[3] == 0x01 {
		// Start code prefix already exists
		return h264Frame, nil
	}

	// Create H.265 frame with start code prefix and SPS/PPS
	h265Frame := append([]byte{0x00, 0x00, 0x00, 0x01}, sps...)
	h265Frame = append(h265Frame, []byte{0x00, 0x00, 0x00, 0x01}...)
	h265Frame = append(h265Frame, pps...)
	h265Frame = append(h265Frame, []byte{0x00, 0x00, 0x00, 0x01}...)
	h265Frame = append(h265Frame, h264Frame...)

	return h265Frame, nil
}

// parseRTPH264NALUnits parses H264 NAL units from an RTP packet payload.
func parseRTPH264NALUnits(payload []byte) ([][]byte, error) {
	var nalUnits [][]byte

	// Extract other NAL units from the payload
	for i := 0; i < len(payload); {
		// Find start code prefix (0x000001 or 0x00000001)
		start := i
		for i < len(payload)-3 && !(payload[i] == 0 && payload[i+1] == 0 && (payload[i+2] == 1 || (payload[i+2] == 0 && payload[i+3] == 1))) {
			i++
		}

		// Append the NAL unit to the list of NAL units
		nalUnit := payload[start:i]
		if len(nalUnit) > 0 {
			nalUnits = append(nalUnits, nalUnit)
		}

		// Move to the next NAL unit
		if i < len(payload)-3 && payload[i+2] == 0 && payload[i+3] == 1 {
			i += 4 // 0x00000001 prefix
		} else if i < len(payload)-2 && payload[i+2] == 1 {
			i += 3 // 0x000001 prefix
		} else {
			i++
		}
	}

	return nalUnits, nil
}

Thanks again, we appreciate ya!

Looking at your goals, I'd strongly suggest that you take a look at the transcoding example (you can skip the filter part since you don't need it) since that's basically what you're trying to do.

Also, I'd strongly consider replacing gortsplib with demuxing the stream with go-astiav directly, if possible, that would simplify things greatly (but that's not mandatory).

Regarding your code, there seems to be a misunderstanding: data is coming from rtsp as h264 packets (compressed) therefore you first need to decode them (transform them to frames) and encode them using an h265 encoder. Again, understanding the transcoding example is a must in your situation and particularly the decoding part as well as the encoding part.

Ok, roger that.. I'll give it another go. I think I just got lost in the weeds.

Thank you for the quick response @asticode !!