talariadb/talaria

Use sync.Pool to reduce the number of memory allocations on encode

Opened this issue · 1 comments

In this encoding part, we have too many allocations:

e.batch = &pb.Batch{Events: make([]*pb.Event, 0, len(events))}

We can optimise it like this(Just an example)

type encoder struct {
    // ... other fields ...
    batchPool *sync.Pool
}

func newEncoder() *encoder {
    e := new(encoder)
    e.batchPool = &sync.Pool{
        New: func() interface{} {
            return &pb.Batch{Events: make([]*pb.Event, 0, batchSize)}
        },
    }
    return e
}

// Encode implements formatter interface
func (e *encoder) Encode(events []Event) *pb.Batch {
    e.Lock()
    defer e.Unlock()

    e.next = 0
    e.dictionary = make(map[string]uint32, len(events))
    e.batch = e.batchPool.Get().(*pb.Batch)
    e.batch.Events = e.batch.Events[:0]

    // Write the events
    for _, ev := range events {
        encoded := e.encodeEvent(ev)
        e.batch.Events = append(e.batch.Events, encoded)
    }

    // Write the interned strings
    e.writeDictionary()
    e.batchPool.Put(e.batch)
    return e.batch
}

You should only release the data once it's no longer used, otherwise you'll end up with corrupt memory.