xerial/snappy-java

Compression Ratio for Float/Double Arrays

gmseed opened this issue · 0 comments

Hi

I'm new to Snappy-Java and wrote a basic test for examining the compression ratio for arrays of int, float and double. I'm finding for small arrays of ~10 elements the compressed bytes are greater than the original arrays. For larger arrays of ~100 elements, int[] compression is around 50% but for float[]/double[] the compressed size is greater than the original size, which surely defeats the purpose of running a compressor.

Maybe I'm doing something wrong, but my test code is pasted below. The results of the test functions is below.

Results:

`Hello snappy-java! Snappy-java is a JNI-based wrapper of Snappy, a fast compresser/decompresser.

compressed length: 87, uncompressed length: 96

compressed int[] bytes: 42, uncompressed bytes: 40

compressed float[] bytes: 42, uncompressed bytes: 40

compressed double[] bytes: 74, uncompressed bytes: 80

compressed int[] bytes: 2208, uncompressed bytes: 4096

compressed float[] bytes: 4101, uncompressed bytes: 4096

compressed double[] bytes: 8197, uncompressed bytes: 8192`

Code:

`package utils.tocompress;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;

import org.junit.jupiter.api.Test;
import org.xerial.snappy.Snappy;

public class SnappyTest {
/**
* Website example.
*/
@test
public void test_website_example() {
String input = "Hello snappy-java! Snappy-java is a JNI-based wrapper of Snappy, a fast compresser/decompresser.";
try {
byte[] compressed = Snappy.compress(input.getBytes("UTF-8"));
byte[] uncompressed = Snappy.uncompress(compressed);

		String result = new String(uncompressed, "UTF-8");
		System.out.println(
				"compressed length: " + compressed.length + ", uncompressed length: " + uncompressed.length);
		System.out.println(result);
	} catch (UnsupportedEncodingException e) {
		e.printStackTrace();
	} catch (IOException e) {
		e.printStackTrace();
	}
}

/**
 * Arrays of numbers.
 */
@Test
public void test_arrays_of_numbers() {
	try {
		// int //
		int[] ints = new int[] { 1, 111, 65, 888, 99, 45654, 112, 12, 45, 10 };
		byte[] compressed_ints = Snappy.compress(ints);
		int[] uncompressed_ints = Snappy.uncompressIntArray(compressed_ints);
		boolean isSame = isSameArray(ints, uncompressed_ints);
		if (!isSame) {
			System.out.println("NOT SAME");
		}
		System.out.println(
				"compressed int[] bytes: " + compressed_ints.length + ", uncompressed bytes: " + (uncompressed_ints.length*Integer.BYTES));
		// float //
		float[] floats = new float[] { 1.1f, 111.22f, 65.65f, 888.8f, 99.01f, 45654.12f, 112.112f, 12.1f, 45.54f, 10.1f };
		byte[] compressed_floats = Snappy.compress(floats);
		float[] uncompressed_floats = Snappy.uncompressFloatArray(compressed_floats);
		isSame = isSameArray(floats, uncompressed_floats, 0.001f);
		if (!isSame) {
			System.out.println("NOT SAME");
		}
		System.out.println(
				"compressed float[] bytes: " + compressed_floats.length + ", uncompressed bytes: " + (uncompressed_floats.length*Float.BYTES));
		// double //
		double[] doubles = new double[] { 1.1, 111.22, 65.65, 888.8, 99.01, 45654.12, 112.112, 12.1, 45.54, 10.1 };
		byte[] compressed_doubles = Snappy.compress(doubles);
		double[] uncompressed_doubles = Snappy.uncompressDoubleArray(compressed_doubles);
		isSame = isSameArray(doubles, uncompressed_doubles, 0.001);
		if (!isSame) {
			System.out.println("NOT SAME");
		}
		System.out.println(
				"compressed double[] bytes: " + compressed_doubles.length + ", uncompressed bytes: " + (uncompressed_doubles.length*Double.BYTES));
		
		// int //
		int numValues = 1024;
		int min = 0;
		int max = 100;
		int[] ints_random = new int[numValues];
		for (int i=0; i<numValues; i++) {
			int randomNum = ThreadLocalRandom.current().nextInt(min, max + 1);
			ints_random[i] = randomNum;
		}
		compressed_ints = Snappy.compress(ints_random);
		uncompressed_ints = Snappy.uncompressIntArray(compressed_ints);
		isSame = isSameArray(ints_random, uncompressed_ints);
		if (!isSame) {
			System.out.println("NOT SAME");
		}
		System.out.println(
				"compressed int[] bytes: " + compressed_ints.length + ", uncompressed bytes: " + (ints_random.length*Integer.BYTES));
		
		// float //
		Random r = new Random();
		float[] floats_random = new float[numValues];
		for (int i=0; i<numValues; i++) {
			float random = min + r.nextFloat() * (max - min);
			floats_random[i] = random;
		}
		compressed_floats = Snappy.compress(floats_random);
		uncompressed_floats = Snappy.uncompressFloatArray(compressed_floats);
		isSame = isSameArray(floats_random, uncompressed_floats, 0.001f);
		if (!isSame) {
			System.out.println("NOT SAME");
		}
		System.out.println(
				"compressed float[] bytes: " + compressed_floats.length + ", uncompressed bytes: " + (floats_random.length*Float.BYTES));
		
		// double //
		double[] doubles_random = new double[numValues];
		for (int i=0; i<numValues; i++) {
			double random = min + r.nextDouble() * (max - min);
			doubles_random[i] = random;
		}
		byte[] compressed_doubles_r = Snappy.compress(doubles_random);
		double[] uncompressed_doubles_r = Snappy.uncompressDoubleArray(compressed_doubles_r);
		isSame = isSameArray(doubles_random, uncompressed_doubles_r, 0.001);
		if (!isSame) {
			System.out.println("NOT SAME");
		}
		System.out.println(
				"compressed double[] bytes: " + compressed_doubles_r.length + ", uncompressed bytes: " + (doubles_random.length*Double.BYTES));
		
	} catch (UnsupportedEncodingException e) {
		e.printStackTrace();
	} catch (IOException e) {
		e.printStackTrace();
	}
}

public boolean isSameArray(int[] array1, int[] array2) {
	if (array1.length != array2.length) {
		return false;
	}
	for (int i=0; i<array1.length; i++) {
		if (array1[i] != array2[i]) {
			return false;
		}
	}
	return true;
}

public boolean isSameArray(float[] array1, float[] array2, float tol) {
	if (array1.length != array2.length) {
		return false;
	}
	for (int i=0; i<array1.length; i++) {
		if (Math.abs(array1[i] - array2[i]) > tol) {
			return false;
		}
	}
	return true;
}

public boolean isSameArray(double[] array1, double[] array2, double tol) {
	if (array1.length != array2.length) {
		return false;
	}
	for (int i=0; i<array1.length; i++) {
		if (Math.abs(array1[i] - array2[i]) > tol) {
			return false;
		}
	}
	return true;
}

} // class SnappyTest
`