zserge/jsmn

Iterating over even more complex json objects

dieggsy opened this issue · 5 comments

simple.c shows how to iterate over jsmn objects that contain JSMN_ARRAY - how does one iterate over jsmn objects where values are more JSMN_OBJECTs?

The .size trick shown with the arrays doesn't work for JSMN_OBJECT, because .size for objects is the number of key/value pairs in the current level object, with no knowledge of the size of the inner objects.

Thinking about it more, I feel like this is almost the definition of the sort of thing you need recursion for... I might get there eventually.

I've whipped something up to this effect:

#define JSMN_STRICT
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include "jsmn.h"

static int jsoneq(const char *json, jsmntok_t *tok, const char *s) {
  if (tok->type == JSMN_STRING && (int)strlen(s) == tok->end - tok->start &&
      strncmp(json + tok->start, s, tok->end - tok->start) == 0) {
    return 0;
  }
  return -1;
}

static void json_print(const char* jstr, jsmntok_t *json) {
    if (json == NULL) {
        printf("Null!\n");
    } else {
        printf("%.*s\n", json[0].end - json[0].start, jstr+json[0].start);
    }
}

int skip_object (jsmntok_t *json) {
    if (json->type == JSMN_PRIMITIVE
        || json->type == JSMN_STRING) {
        return 1;
    }
    else {
        int i = 1, skip = 0;
        int size = json->type == JSMN_ARRAY ? json->size : json->size * 2;
        while (i <= size ) {
            if (json[i].type == JSMN_OBJECT
                || json[i].type == JSMN_ARRAY) {
                int local_skip = skip_object(&json[i]);
                skip += local_skip;
                i += local_skip;
                size += local_skip;
            } else {
                skip++;
                i++;
            }
        }
        return skip;
    }
}
static jsmntok_t *vjson_oref(const char* jstr, jsmntok_t* json, va_list keys) {
    char* key = va_arg(keys,char*);
    if (key == NULL) {
        return json;
    }
    if (json[0].type != JSMN_OBJECT) {
        return NULL;
    }
    int size = json->size * 2;
    int i = 1;
    while (i <= size) {
        if (json[i].type == JSMN_OBJECT) {
            int local_skip = skip_object(&json[i]);
            i+= local_skip + 1;
            size += local_skip;
            continue;
        }
        if (jsoneq(jstr, &json[i], key) == 0) {
            return vjson_oref(jstr, &json[i+1], keys);
        }
        ++i;
    }
    return NULL;
}

static jsmntok_t *json_oref(const char* jstr, jsmntok_t* json, ...) {
    va_list args;
    va_start(args, json);
    jsmntok_t* result = vjson_oref(jstr, json, args);
    va_end(args);
    return result;
}

int main() {
    int r;
    jsmn_parser p;
    jsmntok_t json[200];
    char* json_string = "{\"foo\": {\"bar\": {\"x\":1,\"y\":2}, \"baz\": 5}, \"qux\": 2}";

    jsmn_init(&p);
    r = jsmn_parse(&p, json_string, strlen(json_string), json, sizeof(json)/sizeof(json[0]));
    if (r == JSMN_ERROR_NOMEM) {
        printf("jsmntok_t too small!");
    } else if (r == JSMN_ERROR_INVAL) {
        printf("Invalid json!");
    } else if (r == JSMN_ERROR_PART) {
        printf("Too little json data!");
    }

    if (r < 1 || json[0].type != JSMN_OBJECT) {
        printf("Object expected\n");
        return 1;
    }

    jsmntok_t* obj = json_oref(json_string,json,"foo",NULL);
    printf("Level 1:      json[foo]: ");
    json_print(json_string, obj);

    obj = json_oref(json_string,json,"foo","bar",NULL);
    printf("Level 2: json[foo][bar]: ");
    json_print(json_string, obj);

    obj = json_oref(json_string,json,"nonexistent",NULL);
    printf("None: json[nonexistent]: ");
    json_print(json_string, obj);

    return 0;
}

Which prints:

Level 1:      json[foo]: {"bar": {"x":1,"y":2}, "baz": 5}
Level 2: json[foo][bar]: {"x":1,"y":2}
None: json[nonexistent]: Null!

It's possible skip_object and json_oref could be combined into a single function, but this is how I sort of logically broke it down in my head. I'll leave this open in case someone has input on this or a better solution, but feel free to close.

EDIT: Handle arrays properly, taking into account that they can themselves contain more objects/arrays.

pt300 commented

Honestly, myself I see room for improvement in the parser itself to make this task easier. Possibly add another field to token data with position of a sibling?

I come up with some nice C++ usage pattern, maybe someone will be interested:

mlabbe commented

Recursive functions to work around tok.size being non-hierarchical overcomplicates the issue. You can just:

  • Take a token of any type, including a nested object or array. Store its tok.end, say, as obj.end
  • Iterate on tokens until tok.start > obj.end

Now your iterator is past the object and all of its nested tokens, pointing at the first non-nested token.

jsmn tokens are ordered in source document order. Assume obj is the token of the nested object/array:

  1. tokens that appear in the document before the nested object/array will have a tok.end < obj.start.
  2. tokens that appear in the document after the nested object/array will have a tok.start > obj.end.
  3. tokens that are nested inside the nested object/array will have tok.start > obj.start and tok.end < obj.end.

Once you understand your data, a flat iterator over tokens works here.

// `*i` will be the index of the next non-nested token, or the last token.
// works for any type of token
static void
jsmn_nested_skip(const jsmntok_t* tok, int num_tokens, int* i)
{
    for (int char_end = tok[*i].end; *i < num_tokens && tok[*i].start < char_end; (*i)++)
        ; // pass
}