ZJONSSON/node-unzipper

Incorrect decoding when parsing an archive with a file with Cyrillic characters

grdvsng opened this issue · 2 comments

I'm use archive with only one file with 5000K content length.

main test:

import { createReadStream } from 'fs';
import * as path from 'path';
import * as unzipper from 'unzipper';

import { checkInvalideByte } from '../string-helper';

jest.setTimeout(1000 * 30);

const TEST_ARCHIVE_PATH = path.join(
  __dirname,
  './mock_data/archive-with-russian-chars.zip',
);

describe('archive-with-cyrillic -chars', () => {
  describe('unzipper lib test', () => {
    it('should incorrect decode when use readable stream', async () => {
      const stream = createReadStream(TEST_ARCHIVE_PATH);
      const ended = new Promise((resolve, reject) => {
        stream.on('end', resolve);
        stream.on('error', reject);

        const unzipStream = unzipper.ParseOne();

        unzipStream.on('data', (chunk) => {
          const line = chunk.toString();

          checkInvalideByte(line).catch(reject);
        });

        stream.pipe(unzipStream);
      });

      await expect(ended).rejects.toThrow();
    });
  });
});

string-helper.ts

export function checkInvalideByte(content: string): Promise<void> {
  return new Promise((resolve, reject) => {
    const splited = content.split('\n');

    for (let i = 0; i < splited.length; i += 1) {
      const line = splited[i];
      const column = line.indexOf('�');

      if (column >= 0) {
        const text = `Invalide byte decoded at line: ${i + 1} column: ${
          column + 1
        } ('${line.slice(column - 10, column + 1)}')`;

        reject(new Error(text));
      }
    }

    resolve();
  });
}

Hey, @grdvsng did you solve it? I'm having the same problem.