klauspost/compress

Flate compresses the same content over and over again, why doesn't it get compressed to a particularly small size?

lxzan opened this issue · 5 comments

Thank you for your work.
I'm implementing WebSocket RFC 7692, and I'm confused about it.

github.com/klauspost/compress v1.17.4

package main

import (
	"bytes"
	"github.com/klauspost/compress/flate"
	"math/rand"
	"sync"
	"time"
)

func BinaryPow(n int) int {
	var ans = 1
	for i := 0; i < n; i++ {
		ans *= 2
	}
	return ans
}

type slideWindow struct {
	enabled bool
	dict    []byte
	size    int
}

func (c *slideWindow) initialize(windowBits int) *slideWindow {
	c.enabled = true
	c.size = BinaryPow(windowBits)
	c.dict = make([]byte, 0, c.size)
	return c
}

func (c *slideWindow) Write(p []byte) {
	if !c.enabled {
		return
	}

	var n = len(p)
	var length = len(c.dict)
	if n+length <= c.size {
		c.dict = append(c.dict, p...)
		return
	}

	var m = c.size - length
	c.dict = append(c.dict, p[:m]...)
	p = p[m:]
	n = len(p)

	if n >= c.size {
		copy(c.dict, p[n-c.size:])
		return
	}

	copy(c.dict, c.dict[n:])
	copy(c.dict[c.size-n:], p)
}

type RandomString struct {
	mu   sync.Mutex
	rand *rand.Rand
	dict string
}

var (
	Alphabet = (&RandomString{dict: "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"}).init()
	Numeric  = (&RandomString{dict: "0123456789"}).init()
)

func (c *RandomString) init() *RandomString {
	c.rand = rand.New(rand.NewSource(time.Now().UnixNano()))
	return c
}

func (c *RandomString) Generate(n int) string {
	c.mu.Lock()
	defer c.mu.Unlock()

	var b = make([]byte, n)
	var length = len(c.dict)
	for i := 0; i < n; i++ {
		var idx = c.rand.Intn(length)
		b[i] = c.dict[idx]
	}
	return string(b)
}

func main() {
	var buf = bytes.NewBuffer(nil)
	var windowSize = BinaryPow(12)
	var sw = new(slideWindow).initialize(12)
	var fw, _ = flate.NewWriterWindow(buf, windowSize)
	var content = []byte(Alphabet.Generate(1000))
	fw.Write(content)
	fw.Flush()
	sw.Write(content)
	println(buf.Len())

	buf.Reset()
	fw.ResetDict(buf, sw.dict)
	fw.Write(content)
	fw.Flush()
	sw.Write(content)
	println(buf.Len())

	buf.Reset()
	fw.ResetDict(buf, sw.dict)
	fw.Write(content)
	fw.Flush()
	sw.Write(content)
	println(buf.Len())
}

I've observed in Chrome, when sending the same content over and over again, the message is compressed to a very small size.

Confirmed. Fix in #912

cool

By the way, can't both custom window and compression level exist?

@lxzan It is a specialized compressor: https://github.com/klauspost/compress/blob/master/flate/level5.go#L312

So it offers "middle of the road".