Compression Ratio for Float/Double Arrays
gmseed opened this issue · 0 comments
Hi
I'm new to Snappy-Java and wrote a basic test for examining the compression ratio for arrays of int, float and double. I'm finding for small arrays of ~10 elements the compressed bytes are greater than the original arrays. For larger arrays of ~100 elements, int[] compression is around 50% but for float[]/double[] the compressed size is greater than the original size, which surely defeats the purpose of running a compressor.
Maybe I'm doing something wrong, but my test code is pasted below. The results of the test functions is below.
Results:
`Hello snappy-java! Snappy-java is a JNI-based wrapper of Snappy, a fast compresser/decompresser.
compressed length: 87, uncompressed length: 96
compressed int[] bytes: 42, uncompressed bytes: 40
compressed float[] bytes: 42, uncompressed bytes: 40
compressed double[] bytes: 74, uncompressed bytes: 80
compressed int[] bytes: 2208, uncompressed bytes: 4096
compressed float[] bytes: 4101, uncompressed bytes: 4096
compressed double[] bytes: 8197, uncompressed bytes: 8192`
Code:
`package utils.tocompress;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;
import org.junit.jupiter.api.Test;
import org.xerial.snappy.Snappy;
public class SnappyTest {
/**
* Website example.
*/
@test
public void test_website_example() {
String input = "Hello snappy-java! Snappy-java is a JNI-based wrapper of Snappy, a fast compresser/decompresser.";
try {
byte[] compressed = Snappy.compress(input.getBytes("UTF-8"));
byte[] uncompressed = Snappy.uncompress(compressed);
String result = new String(uncompressed, "UTF-8");
System.out.println(
"compressed length: " + compressed.length + ", uncompressed length: " + uncompressed.length);
System.out.println(result);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Arrays of numbers.
*/
@Test
public void test_arrays_of_numbers() {
try {
// int //
int[] ints = new int[] { 1, 111, 65, 888, 99, 45654, 112, 12, 45, 10 };
byte[] compressed_ints = Snappy.compress(ints);
int[] uncompressed_ints = Snappy.uncompressIntArray(compressed_ints);
boolean isSame = isSameArray(ints, uncompressed_ints);
if (!isSame) {
System.out.println("NOT SAME");
}
System.out.println(
"compressed int[] bytes: " + compressed_ints.length + ", uncompressed bytes: " + (uncompressed_ints.length*Integer.BYTES));
// float //
float[] floats = new float[] { 1.1f, 111.22f, 65.65f, 888.8f, 99.01f, 45654.12f, 112.112f, 12.1f, 45.54f, 10.1f };
byte[] compressed_floats = Snappy.compress(floats);
float[] uncompressed_floats = Snappy.uncompressFloatArray(compressed_floats);
isSame = isSameArray(floats, uncompressed_floats, 0.001f);
if (!isSame) {
System.out.println("NOT SAME");
}
System.out.println(
"compressed float[] bytes: " + compressed_floats.length + ", uncompressed bytes: " + (uncompressed_floats.length*Float.BYTES));
// double //
double[] doubles = new double[] { 1.1, 111.22, 65.65, 888.8, 99.01, 45654.12, 112.112, 12.1, 45.54, 10.1 };
byte[] compressed_doubles = Snappy.compress(doubles);
double[] uncompressed_doubles = Snappy.uncompressDoubleArray(compressed_doubles);
isSame = isSameArray(doubles, uncompressed_doubles, 0.001);
if (!isSame) {
System.out.println("NOT SAME");
}
System.out.println(
"compressed double[] bytes: " + compressed_doubles.length + ", uncompressed bytes: " + (uncompressed_doubles.length*Double.BYTES));
// int //
int numValues = 1024;
int min = 0;
int max = 100;
int[] ints_random = new int[numValues];
for (int i=0; i<numValues; i++) {
int randomNum = ThreadLocalRandom.current().nextInt(min, max + 1);
ints_random[i] = randomNum;
}
compressed_ints = Snappy.compress(ints_random);
uncompressed_ints = Snappy.uncompressIntArray(compressed_ints);
isSame = isSameArray(ints_random, uncompressed_ints);
if (!isSame) {
System.out.println("NOT SAME");
}
System.out.println(
"compressed int[] bytes: " + compressed_ints.length + ", uncompressed bytes: " + (ints_random.length*Integer.BYTES));
// float //
Random r = new Random();
float[] floats_random = new float[numValues];
for (int i=0; i<numValues; i++) {
float random = min + r.nextFloat() * (max - min);
floats_random[i] = random;
}
compressed_floats = Snappy.compress(floats_random);
uncompressed_floats = Snappy.uncompressFloatArray(compressed_floats);
isSame = isSameArray(floats_random, uncompressed_floats, 0.001f);
if (!isSame) {
System.out.println("NOT SAME");
}
System.out.println(
"compressed float[] bytes: " + compressed_floats.length + ", uncompressed bytes: " + (floats_random.length*Float.BYTES));
// double //
double[] doubles_random = new double[numValues];
for (int i=0; i<numValues; i++) {
double random = min + r.nextDouble() * (max - min);
doubles_random[i] = random;
}
byte[] compressed_doubles_r = Snappy.compress(doubles_random);
double[] uncompressed_doubles_r = Snappy.uncompressDoubleArray(compressed_doubles_r);
isSame = isSameArray(doubles_random, uncompressed_doubles_r, 0.001);
if (!isSame) {
System.out.println("NOT SAME");
}
System.out.println(
"compressed double[] bytes: " + compressed_doubles_r.length + ", uncompressed bytes: " + (doubles_random.length*Double.BYTES));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public boolean isSameArray(int[] array1, int[] array2) {
if (array1.length != array2.length) {
return false;
}
for (int i=0; i<array1.length; i++) {
if (array1[i] != array2[i]) {
return false;
}
}
return true;
}
public boolean isSameArray(float[] array1, float[] array2, float tol) {
if (array1.length != array2.length) {
return false;
}
for (int i=0; i<array1.length; i++) {
if (Math.abs(array1[i] - array2[i]) > tol) {
return false;
}
}
return true;
}
public boolean isSameArray(double[] array1, double[] array2, double tol) {
if (array1.length != array2.length) {
return false;
}
for (int i=0; i<array1.length; i++) {
if (Math.abs(array1[i] - array2[i]) > tol) {
return false;
}
}
return true;
}
} // class SnappyTest
`