kapenga/LittleBit

reduce header size

da0ka opened this issue · 2 comments

da0ka commented

cost of literal count and literals will be a few bytes smaller.

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;

class CanonicalHuffmanTree {

private int[] depths;
private HuffmanNode[][] frequencyArray;
private HuffmanNode endOfLineSymbol;
private int firstOccurrence, lastOccurrence, biggestCount;


CanonicalHuffmanTree(HuffmanNode[] frequencies){
	endOfLineSymbol = frequencies[0];
	HuffmanNode root = HuffmanNode.method2(frequencies);
	HuffmanNode[][] newFreqs = new HuffmanNode[64][];
	int[] depths = this.depths = new int[64];
	root.getDepths(depths, 0);
	firstOccurrence = lastOccurrence = biggestCount = 0;

	for(int i = depths.length,d;i>0;){
		newFreqs[--i] = new HuffmanNode[d=depths[i]];
		if(d>0){
			if(d > biggestCount)
				biggestCount = d;
			if(lastOccurrence == 0)
				lastOccurrence = i;
			firstOccurrence = i;
		}
	}
	for (HuffmanNode f : frequencies){
		int i = f.depth;
		newFreqs[i][--depths[i]] = f;
	}

	//Restore the depths values
	for(int i = newFreqs.length;i>0;)
		depths[--i] = newFreqs[i].length;

	//Move the literals to the front
	for(int i = firstOccurrence; i <= lastOccurrence; i++)
		for(int x = 0,literalIndex = 0; x < depths[i]; x++)
			if(newFreqs[i][x].a == null){ //Literal
				if(x > literalIndex){
					HuffmanNode temp = newFreqs[i][literalIndex];
					newFreqs[i][literalIndex] = newFreqs[i][x];
					newFreqs[i][x] = temp;
				}
				literalIndex++;
			}

	//Assign new canonical bits.
	BitSet bits = new BitSet(firstOccurrence,0);
	for(int i = firstOccurrence; i <= lastOccurrence; i++){
		for(int x = 0; x < depths[i];bits.increase())
			newFreqs[i][x++].bitSet = bits.reverse();
		bits.increaseLength();
		bits.shiftLeft();
	}
	this.frequencyArray = newFreqs;
}

private static int bitSize(long i){
	int result = 0;
	while(i > 0){
		result++;
		i>>>=1;
	}
	return result;
}

void writeTree(BitStreamWriter writer) throws IOException {
	long startLength = writer.length();

	int bitSize = bitSize(biggestCount),m=256;
	int[] M=new int[m];
	BitSet bits = new BitSet(5, bitSize);
	BitSet first = new BitSet(6, firstOccurrence);
	BitSet last = new BitSet(6, lastOccurrence);
	bits.write(writer);
	first.write(writer);
	last.write(writer);

	for(int i = firstOccurrence; i <= lastOccurrence;)
		(new BitSet(bitSize, depths[i++])).write(writer);

	endOfLineSymbol.bitSet.write(writer); //end of line symbol first
	for(int i = firstOccurrence,d,e=257; i <= lastOccurrence; i++){
		if((d=depths[i]) > 0){
			HuffmanNode[] F=frequencyArray[i];
			int literalCount = 0;
			for (int x = 0; x < d&&F[x++].a == null;)literalCount++;
			BitSet amount = new BitSet(bitSize(d<e?d:e), literalCount);
			amount.write(writer);e-=literalCount;
			for (int x = 0; x < literalCount; x++)
				if (F[x].symbol > -1){
					int c,n=0,s=F[x].symbol;
					for(M[s]=1;s>0;)if(M[--s]<1)n++;
					s=bitSize(m);c=(1<<s)-m--;
					if(n<c)(new BitSet(s-1,n)).write(writer);
					else{n+=c;
						(new BitSet(s-1,n>>1)).write(writer);
						(new BitSet(1,n&1)).write(writer);
					}
				}
			for (int x = literalCount; x < d; x++){
				F[x].a.bitSet.write(writer);
				F[x].b.bitSet.write(writer);
			}
		}
	}
	long endLength = writer.length();
	System.out.println("Size of library:\t" + (endLength - startLength) + " bytes");
}

static DecodeNode readTree(BitStreamReader reader) throws Exception {
	int bitSize = (int)BitSet.read(reader, 5).getValue(),m=256;
	int firstOccurrence = (int)BitSet.read(reader, 6).getValue();
	int lastOccurrence = (int)BitSet.read(reader, 6).getValue();

	BitSet[][] huffbits = new BitSet[lastOccurrence+1][];
	int[] depths = new int[lastOccurrence+1], M=new int[m];
	for(int i = firstOccurrence; i <= lastOccurrence; i++)
		huffbits[i] = new BitSet[depths[i]=(int)BitSet.read(reader, bitSize).getValue()];

	//Assign new canonical bits.
	BitSet bits = new BitSet(firstOccurrence,0);
	HashSet<BitSet> leafs = new HashSet<>();
	for(int i = firstOccurrence; i <= lastOccurrence; i++){
		for(int x = 0; x < depths[i];bits.increase())
			leafs.add(huffbits[i][x++] = bits.copy());
		bits.increaseLength();
		bits.shiftLeft();
	}

	DecodeNode root = new DecodeNode(leafs, new BitSet(0));
	HashMap<BitSet, DecodeNode> dictionary = new HashMap<>();
	root.addToDictionary(dictionary);

	DecodeNode[][] nodes = new DecodeNode[lastOccurrence+1][];
	for(int i = firstOccurrence,x; i <= lastOccurrence; i++)
		for(nodes[i] = new DecodeNode[x=depths[i]];x>0;)
			nodes[i][--x] = dictionary.get(huffbits[i][x]);

	DecodeNode endOfLineSymbol = root.get(reader);
	endOfLineSymbol.data = new byte[0];

	for(int i = firstOccurrence,d,e=257; i <= lastOccurrence; i++)
		if((d=depths[i]) > 0){
			int literalCount = (int) BitSet.read(reader, bitSize(d<e?d:e)).getValue();
			e-=literalCount;
			for (int x = 0; x < literalCount; x++)
				if (nodes[i][x] != endOfLineSymbol){
					int c=0,s=bitSize(m),n=(1<<s)-m--;
					s=(int) BitSet.read(reader,s-1).getValue();
					if(s>=n)s+=s-n+(int) BitSet.read(reader,1).getValue();
					for(;M[c]>0||s-->0;)c++;M[c]=1;
					nodes[i][x].setData(new byte[]{(byte)c});
				}
			for (int x = literalCount; x < d;)
				nodes[i][x++].setToResolveNodes(root.get(reader), root.get(reader));
		}
	for(int i = firstOccurrence; i <= lastOccurrence; i++)
		for(int x = depths[i];x>0;)nodes[i][--x].resolve();
	return root;
}

}`

Thank you for your suggestion and sorry for not replaying earlier.
I am delighted to see someone actually taking the time to read my code and suggest improvements. Thank you for that!

You have a very compacted style of programming that makes it a bit hard to follow. After a while I was able to decipher what you did and why it is an improvement. I am planning to rewrite the code to reflect the style of my coding and include the suggestion into the base. Thank you.

It is added to LittleBit 0.3. Thanks for contributing.