/lodepng-turbo

lodepng-turbo is a fast PNG image codec that uses SIMD instructions (MMX, SSE2, AVX2, NEON) to accelerate baseline PNG decompression on x86, x86-64, ARM systems.

Primary LanguageC++zlib LicenseZlib

lodepng-turbo

lodepng-turbo is a PNG image codec that uses SIMD instructions (MMX, SSE2, AVX2, NEON) to accelerate baseline PNG decompression on x86, x86-64, ARM systems.

Background

lodepng-turbo is a PNG image codec that uses SIMD instructions (MMX, SSE2, AVX2, NEON) to accelerate baseline PNG decompression on x86, x86-64, ARM systems. On x86 and x86-64 or ARM systems, lodepng-turbo is twice as fast as libpng, but it may be slower elsewhere. With lodepng-turbo, its highly optimized libdeflate and depng prediction routines perform significantly better than lodepng(origin) and libpng.

lodepng-turbo implements both the traditional lodepng API and the less powerful but more direct TurboPNG API.

How to build

Build lodepng-turbo with gcc/clang and GNU make. For Windows, the built library can also be used under msvc.

$ git submodule init
$ git submodule update
$ make

When it is built, static libraries and shared libraries are created.

liblodepngturbo.dll(so) is a shared library and can be used alone. For Windows, use the import library (liblodepngturbo.lib).

liblodepngturbostatic.a (lib) is a static library and can be statically linked to your library. In this case libdeflatestatic.a also needs to be linked.

for MSVC

Attention: When using libpng-turbo with msvc, lodepng(-turbo) of C++ API built with msys2 (g++) can not be used because of different naming conventions. (However, it is set not to be generated by default.)

g++ of msys2 is necessary for building this library by msvc. To build libdeflatestatic.lib in the msys2 environment first.

Open the solution file in the vstudio folder and build it on msvc. The setting of the project file is set to build the static library of libpng - turbo, so change the setting if you want dll.

Loading sample

#include <QtGui>
#include "lodepng.h"

int main(int argc, char *argv[])
{
    QCoreApplication a(argc, argv);
    QString infilename("in.png");
    QString outfilename("out.png");

    if(argc <= 1) {
        qDebug() << "Usage: qt_lodepng_test [in.png] [out.png]";
        return 0;
    }
    if(argc > 1)
        infilename = QString(argv[1]);
    if(argc > 2)
        outfilename = QString(argv[2]);

    QByteArray bytes;
    {
        QFile fl(infilename);
        fl.open(QFile::ReadOnly);
        bytes = fl.readAll();
        fl.close();
    }
    unsigned char* out = nullptr; // lodepng decodes a dynamically allocated bitmap into a buffer
    unsigned width,height;
    unsigned result;
    LodePNGState state; // png state

    state.inspected = 3; // dummy initialize value
    qDebug() << "before init:" << state.inspected; // will be 3
    lodepng_state_init(&state);
    qDebug() << "after init:" << state.inspected; // will be 0

    // check png header and get basical metadata
    result = lodepng_inspect(&width, &height, &state, (unsigned char*)bytes.data(), bytes.size());
    qDebug() << "inspect:" << result << width << height << state.info_png.color.colortype << state.info_raw.colortype << state.inspected; // will be 1
    state.decoder.color_convert = 0; // skip color converting

    QImage::Format fmt = QImage::Format_Indexed8;
    switch(state.info_png.color.colortype) {
        case LodePNGColorType::LCT_GREY: fmt = QImage::Format_Grayscale8; break;
        case LodePNGColorType::LCT_RGB: fmt = QImage::Format_RGB888; break;
        case LodePNGColorType::LCT_PALETTE: fmt = QImage::Format_Indexed8; break;
        case LodePNGColorType::LCT_RGBA: fmt = QImage::Format_RGBA8888; break;
        case LodePNGColorType::LCT_GREY_ALPHA: fmt = QImage::Format_RGBA8888; break;
    }
    qDebug() << "colortype" << state.info_png.color.colortype << "format" << fmt;
    if(state.info_png.color.colortype == LodePNGColorType::LCT_GREY_ALPHA) {
        // LCT_GREY_ALPHA -> LCT_RGBA
        result = lodepng_decode32(&out, &width, &height, (unsigned char*)bytes.data(), bytes.size());
    } else {
        result = lodepng_decode(&out, &width, &height, &state, (unsigned char*)bytes.data(), bytes.size());
        qDebug() << "decode:" << result << width << height << state.info_png.color.colortype << state.info_raw.colortype << fmt;
    }

    QImage img(QSize(width, height), fmt);
    // LodePNGColorType::LCT_PALETTE has palette
    if(state.info_png.color.palettesize > 0) {
        QVector<QRgb> palettes(state.info_png.color.palettesize);
        unsigned char* pal = state.info_png.color.palette;
        for(int i = 0; i < state.info_png.color.palettesize; i++) {
            // on x86 or x64 CPUs, must be swapped between R and B.
            palettes[i] = (pal[4*i+3] << 24) | (pal[4*i+0] << 16) | (pal[4*i+1] <<8) | pal[4*i+2];
        }
        img.setColorTable(palettes);
        qDebug() << "palette set completed:" << state.info_png.color.palettesize;
    }
    // Since Bitmap pads each scanline with 4 bytes, there is a case that there is a gap in the byte stream.
    int bytewidth = width*img.depth()/8;
    if(bytewidth == img.bytesPerLine())
    {
        memcpy(img.bits(), out, img.byteCount());
    } else {
        qDebug() << "bytesPerLine" << img.bytesPerLine() << "bytewidth" << bytewidth;
        for(int y = 0; y < img.height(); y++)
            memcpy(img.scanLine(y), &out[y*bytewidth], bytewidth);
    }
    // Some PNG images may use transparent colors
    // instead of using alpha values to describe transparency,
    // but lodepng does not take this into consideration.
    // (When Convert is Disabled)
    if(state.info_png.color.key_defined) {
        QRgb mask = QRgb(((state.info_png.color.key_r & 0xff) << 16)
                         | ((state.info_png.color.key_g & 0xff) << 8)
                         | (state.info_png.color.key_b & 0xff));
        qDebug() << "MaskOutColor:" << mask;
        qDebug() << "depth" << img.depth();
        img = img.convertToFormat(QImage::Format_ARGB32);
        for(QRgb* pix = (QRgb*)img.bits(); (uchar*)pix < img.bits()+img.byteCount(); pix++) {
            if(((*pix - mask) & 0x00ffffff) == 0x0)
                    *pix = 0x0;
        }
    }
    img.save(outfilename);
    free(out);

    a.exit();
    return 0;
}

Acknowledgments

lodepng is a very easy-to-understand png library, which is the foundation of this library.

libdeflate is a very fast zlib compatible codec, and substantial decode processing is realized with this library.

SimdTests is a public domain to test SIMD optimized functions related mostly to 2D computer graphics.

License

zlib

Copyright

Copyright 2019 KATO Kanryuk.kanryu@gmail.com