
exiv2 parse url crash

xinali opened this issue · 1 comments

Describe the bug

when use Exiv2::Uri::Parse(url) to parse url, if use malformed url, it will crash.
It's a very low security level crash, you can decide to fix it or not.

To Reproduce
edit samples/conntest.cpp as follow, then compile latest commit exiv2

// ***************************************************************** -*- C++ -*-
// con-test.cpp
// Tester application for testing the http/https/ftp/ssh/sftp connection

#include <exiv2/exiv2.hpp>

    #include <curl/curl.h>

#include <iostream>
#include <cstdlib>

void httpcon(const std::string& url, bool useHttp1_0 = false) {
    Exiv2::Dictionary response;
    Exiv2::Dictionary request;
    std::string       errors;

    Exiv2::Uri uri = Exiv2::Uri::Parse(url);

    request["server"] = uri.Host;
    request["page"]   = uri.Path;
    request["port"]   = uri.Port;
    if (!useHttp1_0) request["version"] = "1.1";

    // int serverCode = Exiv2::http(request,response,errors);
    // if (serverCode < 0 || serverCode >= 400 || errors.compare("") != 0) {
    //     throw Exiv2::Error(Exiv2::kerTiffDirectoryTooLarge, "Server", serverCode);
    // }

void curlcon(const std::string& url, bool useHttp1_0 = false) {
    CURL* curl = curl_easy_init();
    if(!curl) {
        throw Exiv2::Error(Exiv2::kerErrorMessage, "Uable to init libcurl.");

    // get the timeout value
    std::string timeoutStr = Exiv2::getEnv(Exiv2::envTIMEOUT);
    long timeout = atol(timeoutStr.c_str());
    if (timeout == 0) {
        throw Exiv2::Error(Exiv2::kerErrorMessage, "Timeout Environmental Variable must be a positive integer.");

    std::string response;
    curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, Exiv2::curlWriter);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
    curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
    curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
    curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, timeout);
    //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1); // debug
    if (useHttp1_0) curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0);
    else            curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);

    /* Perform the request, res will get the return code */
    CURLcode res = curl_easy_perform(curl);
    if(res != CURLE_OK) { // error happends
        throw Exiv2::Error(Exiv2::kerErrorMessage, curl_easy_strerror(res));

    // get return code
    long returnCode;
    curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &returnCode); // get code

    if (returnCode >= 400 || returnCode < 0) {
        throw Exiv2::Error(Exiv2::kerTiffDirectoryTooLarge, "Server", returnCode);

static bool ReadFileToString(const char *filename, std::string *buffer) {
  FILE *f = fopen(filename, "rb");
  if (f == NULL) {
    return false;

  fseek(f, 0, SEEK_END);
  const long size = ftell(f);
  fseek(f, 0, SEEK_SET);

  bool success = true;

  char *tmp = (char *)malloc(size);
  if (tmp == NULL) {
    success = false;

  if (success) {
    success = (fread(tmp, 1, size, f) == size);

  if (success) {
    buffer->assign(tmp, size);

  return success;

int main(int argc,const char** argv)

    if (argc < 2) {
        std::cout << "Usage: " << argv[0] << " url {-http1_0}" << std::endl;
        return 1;
    std::string file_data;
    if (!ReadFileToString(argv[1], &file_data))
        printf("read file failed!\n");
        return 1;
    std::string url(file_data);
    Exiv2::Protocol prot = Exiv2::fileProtocol(url);

    bool useHttp1_0 = false;
    for ( int a = 1 ; a < argc ; a++ ) {
        std::string arg(argv[a]);
        if (arg == "-http1_0") useHttp1_0 = true;

    bool isOk = false;
    try {
        #ifdef EXV_USE_CURL
            if (prot == Exiv2::pHttp || prot == Exiv2::pHttps || prot == Exiv2::pFtp) {
                curlcon(url, useHttp1_0);
                isOk = true;
        if (!isOk && prot == Exiv2::pHttp) {
            httpcon(url, useHttp1_0);
            isOk = true;
    } catch (const Exiv2::AnyError& e) {
        std::cout << "Error: '" << e << "'" << std::endl;
        return -1;

    if (!isOk)  std::cout << "The protocol is unsupported." << std::endl;
    else        std::cout << "OK." << std::endl;
    return 0;

// That's all Folks!

Expected behavior

➜  build git:(master) ✗ gdb bin/conntest
pwndbg: loaded 170 commands. Type pwndbg [filter] for a list.
pwndbg: created $rebase, $ida gdb functions (can be used with print/break)
Reading symbols from bin/conntest...done.

pwndbg> r ./crashes/024b8d41af3621fe50cf5c840bad9bd3
Starting program: /home/exiv2/build/bin/conntest ./crashes/024b8d41af3621fe50cf5c840bad9bd3
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
file_name: ./crashes/024b8d41af3621fe50cf5c840bad9bd3
terminate called after throwing an instance of 'std::length_error'
  what():  basic_string::_M_create

Program received signal SIGABRT, Aborted.
0x00007ffff6750428 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
54      ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
─────────────────────────────────────────────────────[ REGISTERS ]─────────────────────────────────────────────────────
 RAX  0x0
 RBX  0x61c768 ◂— 'basic_string::_M_create'
 RCX  0xffffffffffffffff
 RDX  0x6
 RDI  0x84d2
 RSI  0x84d2
 R8   0x7ffff6ae1770 (_IO_stdfile_2_lock) ◂— 0x0
 R9   0x7ffff7fe8740 ◂— 0x7ffff7fe8740
 R10  0x8
 R11  0x206
 R12  0x622570 ◂— 0x0
 R13  0x7fffffffe0b0 —▸ 0x7fffffffe0c0 ◂— 0x0
 R14  0x6224b1 ◂— 0xe92e20ff7f00003f /* '?' */
 R15  0x7fffffffe130 —▸ 0x7fffffffe140 ◂— 0x3038 /* '80' */
 RBP  0x7ffff6ae0700 (stderr) —▸ 0x7ffff6ae0540 (_IO_2_1_stderr_) ◂— 0xfbad2887
 RSP  0x7fffffffdd68 —▸ 0x7ffff675202a (abort+362) ◂— mov    rdx, qword ptr fs:[0x10]
 RIP  0x7ffff6750428 (raise+56) ◂— cmp    rax, -0x1000 /* 'H=' */
──────────────────────────────────────────────────────[ DISASM ]───────────────────────────────────────────────────────
 ► 0x7ffff6750428 <raise+56>    cmp    rax, -0x1000
   0x7ffff675042e <raise+62>    ja     raise+96 <0x7ffff6750450>

   0x7ffff6750430 <raise+64>    ret

   0x7ffff6750432 <raise+66>    nop    word ptr [rax + rax]
   0x7ffff6750438 <raise+72>    test   ecx, ecx
   0x7ffff675043a <raise+74>    jg     raise+43 <0x7ffff675041b>
   0x7ffff675041b <raise+43>    movsxd rdx, edi
   0x7ffff675041e <raise+46>    mov    eax, 0xea
   0x7ffff6750423 <raise+51>    movsxd rdi, ecx
   0x7ffff6750426 <raise+54>    syscall
 ► 0x7ffff6750428 <raise+56>    cmp    rax, -0x1000
───────────────────────────────────────────────────────[ STACK ]───────────────────────────────────────────────────────
00:0000│ rsp  0x7fffffffdd68 —▸ 0x7ffff675202a (abort+362) ◂— mov    rdx, qword ptr fs:[0x10]
01:0008│      0x7fffffffdd70 ◂— 0x20 /* ' ' */
02:0010│      0x7fffffffdd78 ◂— 0x0
... ↓
─────────────────────────────────────────────────────[ BACKTRACE ]─────────────────────────────────────────────────────
 ► f 0     7ffff6750428 raise+56
   f 1     7ffff675202a abort+362
   f 2     7ffff709384d __gnu_cxx::__verbose_terminate_handler()+365
   f 3     7ffff70916b6
   f 4     7ffff7091701
   f 5     7ffff7091919
   f 6     7ffff70ba26f
   f 7     7ffff7123099
   f 8     7ffff76f4438
   f 9     7ffff76f4438
   f 10     7ffff76f4438
Program received signal SIGABRT

pwndbg> bt
#0  0x00007ffff6750428 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
#1  0x00007ffff675202a in __GI_abort () at abort.c:89
#2  0x00007ffff709384d in __gnu_cxx::__verbose_terminate_handler() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#3  0x00007ffff70916b6 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#4  0x00007ffff7091701 in std::terminate() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#5  0x00007ffff7091919 in __cxa_throw () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#6  0x00007ffff70ba26f in std::__throw_length_error(char const*) () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#7  0x00007ffff7123099 in std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_create(unsigned long&, unsigned long) () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#8  0x00007ffff76f4438 in std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_construct<__gnu_cxx::__normal_iterator<char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > (this=<optimized out>, __beg=..., __end=...) at /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/basic_string.tcc:223
#9  std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_construct_aux<__gnu_cxx::__normal_iterator<char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > (this=<optimized out>, __beg=..., __end=...) at /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/basic_string.h:195
#10 std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_construct<__gnu_cxx::__normal_iterator<char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > (this=<optimized out>, __beg=..., __end=...) at /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/basic_string.h:214
#11 std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string<__gnu_cxx::__normal_iterator<char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, void> (__beg=..., __end=..., this=<optimized out>, __a=...) at /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/basic_string.h:537
#12 Exiv2::Uri::Parse (uri=...) at /home/exiv2/src/futils.cpp:465
Python Exception <class 'gdb.error'> There is no member named _M_dataplus.:
#13 0x0000000000401f46 in httpcon (url=, useHttp1_0=false) at /home/exiv2/samples/conntest.cpp:19
#14 0x00000000004043f5 in main (argc=<optimized out>, argc@entry=2, argv=argv@entry=0x7fffffffe378) at /home/exiv2/samples/conntest.cpp:141
#15 0x00007ffff673b830 in __libc_start_main (main=0x403590 <main(int, char const**)>, argc=2, argv=0x7fffffffe378, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7fffffffe368) at ../csu/libc-start.c:291
#16 0x0000000000401d99 in _start ()

Desktop (please complete the following information):

  • OS: Linux
  • Compiler: clang-6
  • Compile with debug mode

Additional context
It's not a important issue, if you do not decide to fix it, you can ignore it.

@xinali: you forgot to attach your test file, crashes/024b8d41af3621fe50cf5c840bad9bd3.
But I guess it contains a url like this: "http://example.com?xx/yyy"
If the ? comes before the / then pathStart > queryStart on line 465, which causes a std::length_error exception to be thrown.