h264 -> h265
mateothegreat opened this issue · 2 comments
Hey there, I want to start off by saying thank you for your hard work, none of this is easy! 💪
I need to go from RTSP (h264) --> h265 and I think I'm close but I'm stumped a error encoding packet: Invalid argument
error.
Basically I'm coming from an rtp.Packet
from https://github.com/bluenviron/gortsplib/v4 which is h264.
Can I get some pointers on to what I may be missing or how to debug this error (it's super vague).
ffprobe output:
Input #0, rtsp, from 'rtsp://***:***@108.53.***.****12200/Streaming/Channels/101':
Metadata:
title : Media Presentation
Duration: N/A, start: 0.100000, bitrate: N/A
Stream #0:0: Video: h264 (Main), yuvj420p(pc, bt709, progressive), 3840x2160 [SAR 1:1 DAR 16:9], 20 fps, 10 tbr, 90k tbn
Work so far:
package clients
import (
"errors"
"fmt"
"log"
"strings"
"github.com/asticode/go-astiav"
"github.com/bluenviron/gortsplib/v4/pkg/format"
"github.com/nvr-ai/go-rtsp/streams"
)
type H264Client struct {
DecoderCodec *astiav.Codec
DecoderContext *astiav.CodecContext
EncoderCodec *astiav.Codec
EncoderContext *astiav.CodecContext
}
func NewH264Client() (*H264Client, error) {
astiav.SetLogLevel(astiav.LogLevelVerbose)
astiav.SetLogCallback(func(l astiav.LogLevel, fmt, msg, parent string) {
log.Printf("ffmpeg log: %s (level: %d)\n", strings.TrimSpace(msg), l)
})
client := &H264Client{}
// Get the H.264 decoder codec
client.DecoderCodec = astiav.FindDecoder(astiav.CodecIDH264)
if client.DecoderCodec == nil {
return nil, errors.New("H.264 decoder not found")
}
// Allocate the decoder context
client.DecoderContext = astiav.AllocCodecContext(client.DecoderCodec)
if client.DecoderContext == nil {
return nil, errors.New("could not allocate decoder context")
}
// Open the decoder
if err := client.DecoderContext.Open(client.DecoderCodec, nil); err != nil {
return nil, fmt.Errorf("error opening decoder: %w", err)
}
// Get the H.265 encoder codec
client.EncoderCodec = astiav.FindEncoder(astiav.CodecIDHevc)
if client.EncoderCodec == nil {
return nil, errors.New("H.265 encoder not found")
}
// Allocate the encoder context
client.EncoderContext = astiav.AllocCodecContext(client.EncoderCodec)
if client.EncoderContext == nil {
return nil, errors.New("could not allocate encoder context")
}
// Set encoder parameters
client.EncoderContext.SetTimeBase(astiav.NewRational(1, 25))
client.EncoderContext.SetBitRate(1000000)
client.EncoderContext.SetWidth(1280) // Set width of the output frame
client.EncoderContext.SetHeight(720) // Set height of the output frame
client.EncoderContext.SetPixelFormat(astiav.PixelFormat(astiav.PixelFormatYuv420P))
// Open the encoder
if err := client.EncoderContext.Open(client.EncoderCodec, nil); err != nil {
return nil, fmt.Errorf("error opening encoder: %w", err)
}
return client, nil
}
func (c *H264Client) DecodeVideoFrame(rtspPacket *streams.Frame) error {
// Parse H.264 NAL units from the RTP payload
nalUnits, err := parseRTPH264NALUnits(rtspPacket.Packet.Payload)
if err != nil {
return fmt.Errorf("error parsing H.264 NAL units: %w", err)
}
// Encode each H.264 NAL unit to H.265
for _, nalUnit := range nalUnits {
// Convert H.264 NAL unit to H.265 frame with start code, SPS, and PPS
h265Frame, err := h264ToH265(rtspPacket.Media.Formats[0].(*format.H264).PPS, rtspPacket.Media.Formats[0].(*format.H264).SPS, nalUnit)
if err != nil {
return fmt.Errorf("error converting H.264 to H.265: %w", err)
}
// Allocate a packet for encoding
packet := astiav.AllocPacket()
// Set packet data
packet.FromData(h265Frame)
// Encode the packet
if err := c.EncoderContext.SendPacket(packet); err != nil {
return fmt.Errorf("error encoding packet: %w", err)
}
}
return nil
}
func h264ToH265(pps []byte, sps []byte, h264Frame []byte) ([]byte, error) {
// Add start code prefix if necessary
if len(h264Frame) >= 4 && h264Frame[0] == 0x00 && h264Frame[1] == 0x00 && h264Frame[2] == 0x00 && h264Frame[3] == 0x01 {
// Start code prefix already exists
return h264Frame, nil
}
// Create H.265 frame with start code prefix and SPS/PPS
h265Frame := append([]byte{0x00, 0x00, 0x00, 0x01}, sps...)
h265Frame = append(h265Frame, []byte{0x00, 0x00, 0x00, 0x01}...)
h265Frame = append(h265Frame, pps...)
h265Frame = append(h265Frame, []byte{0x00, 0x00, 0x00, 0x01}...)
h265Frame = append(h265Frame, h264Frame...)
return h265Frame, nil
}
// parseRTPH264NALUnits parses H264 NAL units from an RTP packet payload.
func parseRTPH264NALUnits(payload []byte) ([][]byte, error) {
var nalUnits [][]byte
// Extract other NAL units from the payload
for i := 0; i < len(payload); {
// Find start code prefix (0x000001 or 0x00000001)
start := i
for i < len(payload)-3 && !(payload[i] == 0 && payload[i+1] == 0 && (payload[i+2] == 1 || (payload[i+2] == 0 && payload[i+3] == 1))) {
i++
}
// Append the NAL unit to the list of NAL units
nalUnit := payload[start:i]
if len(nalUnit) > 0 {
nalUnits = append(nalUnits, nalUnit)
}
// Move to the next NAL unit
if i < len(payload)-3 && payload[i+2] == 0 && payload[i+3] == 1 {
i += 4 // 0x00000001 prefix
} else if i < len(payload)-2 && payload[i+2] == 1 {
i += 3 // 0x000001 prefix
} else {
i++
}
}
return nalUnits, nil
}
Thanks again, we appreciate ya!
Looking at your goals, I'd strongly suggest that you take a look at the transcoding example (you can skip the filter part since you don't need it) since that's basically what you're trying to do.
Also, I'd strongly consider replacing gortsplib
with demuxing the stream with go-astiav
directly, if possible, that would simplify things greatly (but that's not mandatory).
Regarding your code, there seems to be a misunderstanding: data is coming from rtsp as h264 packets (compressed) therefore you first need to decode them (transform them to frames) and encode them using an h265 encoder. Again, understanding the transcoding example is a must in your situation and particularly the decoding part as well as the encoding part.
Ok, roger that.. I'll give it another go. I think I just got lost in the weeds.
Thank you for the quick response @asticode !!