Json serialization/parsing
Closed this issue · 12 comments
The official python library for protobuf from google has a json format, and it is found in the conformance tests proto definition so this could tie with #27
Considering the zig library has a json parser, it shouldn't be too hard to do.
Actually, it's in the spec! https://protobuf.dev/programming-guides/proto3/#json
Hi, i've been trying to implement this feature (started from deserializing) but i'm stuck on a few moments - maybe you will have a time and desire to help me)
First of all, std.json library (as of 0.12/0.13) is quite good and (almost) can parse a protobuffed JSON string into generated class - but the problem is ArrayList fields since JSON parser treats them as structs (it's waiting for JSON collections with fields items, capacity etc.).
My solution was to inherit an ArrayList with usingnamespace and add jsonParse function to it (see this and this), call it something like SerializableArrayList, place it somewhere at src/protobuf.zig (and also use this class in bootstrapped-generator/main.zig so all generated classes would use it). But when i did this i get lots of errors like this:
src/protobuf.zig:593:49: error: no field named 'items' in struct 'protobuf.SerializableArrayList(generated.google.protobuf.pb.Value)'
for (@field(result, field_name).items) |item| {
^~~~~
src/protobuf.zig:95:12: note: struct declared here
return struct {
^~~~~~
referenced by:
pb_deinit__anon_14611: src/protobuf.zig:572:21
deinit: src/protobuf.zig:1014:29
deinit_field__anon_14163: src/protobuf.zig:587:54
deinit_field__anon_13815: src/protobuf.zig:620:37
pb_deinit__anon_13489: src/protobuf.zig:572:21
deinit: src/protobuf.zig:1014:29
deinit_field__anon_9994: src/protobuf.zig:584:35
pb_deinit__anon_8240: src/protobuf.zig:572:21
deinit: src/protobuf.zig:1014:29
test.Required.Proto3.ProtobufInput.ValidDataRepeated.BOOL.PackedInput.ProtobufOutput: tests/alltypes.zig:80:12
Even simple example doesn't work as expected:
const std = @import("std");
const json = std.json;
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;
pub fn SerializableArrayList(comptime T: type) type {
return struct {
usingnamespace ArrayList(T);
pub fn jsonParse(
allocator: Allocator,
source: anytype,
options: json.ParseOptions,
) !@This() {
const slice = json.innerParse([]T, allocator, source, options);
return @This().fromOwnedSlice(allocator, slice);
}
};
}
const S = struct {
array: SerializableArrayList(u32),
};
// This one works as expected ...
pub fn main_() !void {
const allocator = std.heap.page_allocator;
const t = SerializableArrayList(u32);
var x = t.init(allocator);
defer x.deinit();
try x.append(1);
try x.append(3);
_ = try t.jsonParse(allocator, 1, .{});
const writer = std.io.getStdOut().writer();
try writer.print("{d}: {any}\n", .{ 1, x.items });
if (!std.meta.hasFn(SerializableArrayList(u32), "jsonParse")) {
@compileError("AAA");
}
}
// ... but not this one
pub fn main() !void {
const allocator = std.heap.page_allocator;
const t = SerializableArrayList(u32);
var x = t.init(allocator);
defer x.deinit();
try x.append(1);
try x.append(3);
const s_instance = S{ .array = x };
const writer = std.io.getStdOut().writer();
try writer.print(
"{d}: {any}\n",
.{ 1, @field(s_instance, "array").items },
);
}
Second main() fails at compile time with this messages:
t.zig:57:28: error: expected type 't.SerializableArrayList(u32)', found 'array_list.ArrayListAligned(u32,null)'
/home/libro/Downloads/zig/zig-0.13.0/lib/std/array_list.zig:31:12: note: struct declared here
t.zig:8:12: note: struct declared here
These are changes i made:
diff --git a/bootstrapped-generator/main.zig b/bootstrapped-generator/main.zig
index 8bb0297..018d8bd 100644
--- a/bootstrapped-generator/main.zig
+++ b/bootstrapped-generator/main.zig
@@ -98,13 +98,13 @@ const GenerationContext = struct {
try list.append(try std.fmt.allocPrint(allocator,
\\// Code generated by protoc-gen-zig
- \\ ///! package {s}
+ \\///! package {s}
\\const std = @import("std");
\\const Allocator = std.mem.Allocator;
- \\const ArrayList = std.ArrayList;
\\
\\const protobuf = @import("protobuf");
\\const ManagedString = protobuf.ManagedString;
+ \\const SerializableArrayList = protobuf.SerializableArrayList;
\\const fd = protobuf.fd;
\\
, .{name.buf}));
@@ -314,7 +314,7 @@ const GenerationContext = struct {
}
}
} else {
- prefix = "ArrayList(";
+ prefix = "SerializableArrayList(";
postfix = ")";
}
diff --git a/src/protobuf.zig b/src/protobuf.zig
index dd19967..5fa3178 100644
--- a/src/protobuf.zig
+++ b/src/protobuf.zig
@@ -3,6 +3,7 @@ const StructField = std.builtin.Type.StructField;
const isIntegral = std.meta.trait.isIntegral;
const Allocator = std.mem.Allocator;
const testing = std.testing;
+const json = std.json;
// common definitions
@@ -77,6 +78,21 @@ pub const ManagedString = union(ManagedStringTag) {
}
};
+pub fn SerializableArrayList(comptime T: type) type {
+ return struct {
+ usingnamespace ArrayList(T);
+
+ pub fn jsonParse(
+ allocator: Allocator,
+ source: anytype,
+ options: json.ParseOptions,
+ ) !@This() {
+ const slice = json.innerParse([]T, allocator, source, options);
+ return @This().fromOwnedSlice(allocator, slice);
+ }
+ };
+}
+
/// Enum describing the different field types available.
pub const FieldTypeTag = enum { Varint, FixedInt, SubMessage, String, List, PackedList, OneOf };
@@ -964,26 +980,12 @@ fn decode_json_data(comptime T: type, comptime field_desc: FieldDescriptor, comp
}
pub fn json_decode(comptime T: type, input: []const u8, allocator: Allocator) !T {
- const m : json.Parsed(json.Value.object) = try json.parseFromSlice(json.Value.object, allocator, input, .{});
- defer m.deinit();
-
- const obj : json.ObjectMap = m.value;
-
- var result = pb_init(T, allocator);
+ const parsed = try json.parseFromSlice(T, allocator, input, .{});
+ defer parsed.deinit();
- inline for (@typeInfo(T).Struct.fields) |field| {
- if(obj.get(field.name)) |f| {
- const v = @field(T._desc_table, field.name);
- break try decode_json_data(T, v, field, &result, f, allocator);
- }
- } else {
- std.debug.print("Unknown field received in {s} {any}\n", .{ @typeName(T), extracted_data });
- }
-
- return result;
+ return parsed.value;
}
-
/// public decoding function meant to be embedded in message structures
/// Iterates over the input and try to fill the resulting structure accordingly.
pub fn pb_decode(comptime T: type, input: []const u8, allocator: Allocator) !T {
Any help is appreciated.
Hey @librolibro , thanks for your time. I'll look into it ASAP. Any contribution is very much welcome.
UPD: I still have no idea how to solve problems i wrote about earlier, so I switched and almost finished the ManagedString serialization stuff + some tests. It kinda works, but now i have only more questions :)
I added two additional functions to ManagedString struct for std.json to call:
// This one calls when std.json.parseFromSlice will try to deserialize ManagedString struct
pub fn jsonParse(
allocator: Allocator,
source: anytype,
options: json.ParseOptions,
) !@This() {
const string = try json.innerParse([]const u8, allocator, source, options);
// return ManagedString.copy(string, allocator);
return ManagedString.managed(string);
}
// This one calls when std.json.stringify/stringifyAlloc will try to serialize ManagedString struct to string
pub fn jsonStringify(self: *const @This(), jw: anytype) !void {
try jw.write(&self.getSlice());
}
And decode/encode functions:
pub fn json_decode(
comptime T: type,
input: []const u8,
allocator: Allocator,
) !T {
const parsed = try json.parseFromSlice(T, allocator, input, .{});
defer parsed.deinit();
return parsed.value;
}
pub fn json_encode(
data: anytype,
options: json.StringifyOptions,
allocator: Allocator,
) ![]u8 {
return try json.stringifyAlloc(allocator, data, options);
}
Also i wrote some tests:
test "decode_json_with_strings" {
const managed_string = ManagedString{ .Const = "test_string" };
const test_pb_obj = WithStrings{ .name = managed_string };
defer test_pb_obj.deinit();
const test_json_obj = try protobuf.json_decode(
WithStrings,
\\{"name": "test_string"}
,
std.testing.allocator,
);
// NOTE(libro): std.meta.eql sucks here since
// it's comparing strings as pointers
try expect(std.mem.eql(
u8,
test_pb_obj.name.Const,
test_json_obj.name.Const,
));
}
test "encode_json_with_strings" {
const managed_string = ManagedString{ .Const = "test_string" };
const test_pb_obj = WithStrings{ .name = managed_string };
defer test_pb_obj.deinit();
const encoded_string = try protobuf.json_encode(
test_pb_obj,
.{ .whitespace = .indent_2 },
std.testing.allocator,
);
try expect(std.mem.eql(
u8,
encoded_string,
\\{
\\ "name": "test_string"
\\}
,
));
}
Things i wanted to ask about:
- Tests are passing but second test warns about memory leaks (didn't get why)
- In jsonParse function i'm creating const string. IMO returning owned string looks better since string is not a comptime one and i'm already passing the allocator to JSON parser - maybe i'm wrong about it. Also i couldn't compile tests while using owned string (segmentation fault on comparing Owned.str ones with std.mem.eql)
- std.meta.eql doesn't compare strings correctly because it treats them as pointers but not arrays. It isn't much of a problem but code could be more compact and elegant if I could use it for string-contained struct comparison
Looks like there is 3 ways to make it work for ArrayList:
- Somehow extend ArrayList via (pub) usingnamespace. There's little info about it in the internet (here and here), seems like impossible task for a moment. I saw your MessageMixins usage in generated classes but you're passing concrete final struct as an argument and use it as a Self type - and I can't do this for ArrayList because they're already using @this() for Self internally (which is ArrayList itself). Dead end for now.
- Create a wrapper for ArrayList. Easist (and also ugly IMO) way to implement, something like this (you can see the result at godbolt):
const std = @import("std");
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;
const json = std.json;
pub fn SerializableArrayList(comptime T: type) type {
return struct {
arr_list: ArrayList(T),
pub fn fromArrayList(arr_list: ArrayList(T)) @This() {
return @This(){.arr_list = arr_list};
}
pub fn getItems(self: @This()) []T {
return self.arr_list.items;
}
pub fn deinit(self: @This()) void {
self.arr_list.deinit();
}
pub fn jsonParse(
self: @This(),
allocator: Allocator,
source: anytype,
options: json.ParseOptions,
) !@This() {
const slice = try json.innerParse([]T, allocator, source, options);
return self.arr_list.fromOwnedSlice(allocator, slice);
}
pub fn jsonStringify(self: *const @This(), jw: anytype) !void {
try jw.write(self.arr_list.items);
}
};
}
const S = struct {
f: SerializableArrayList(u32),
};
pub fn main() !void {
const allocator = std.heap.page_allocator;
var a = SerializableArrayList(u32).fromArrayList(
ArrayList(u32).init(allocator),
);
try a.arr_list.append(167);
try a.arr_list.append(168);
const b = S{ .f = a };
std.debug.print("{any}\n", .{b.f.arr_list.items});
std.debug.print("{s}\n", .{try json.stringifyAlloc(
allocator,
b,
.{.whitespace = .indent_2},
)});
}
Looks ugly and not user-friendly for me because now you always need to get extra field of this dummy wrapper (it only keeps ArrayList and 2 methods for std.json extension) - because of that you'll always need to type struct_var.arr_list.append() which is longer that simple arr_list.append(), or add ALL ArrayList methods to this wrapper (sounds terrible). But it works at least)
- Add jsonParse/jsonStringify methods for every generated struct (or for MessageMixins one). This is also bad because you need to reimplement part or innerParse function because it using non-public functions under the hood and inner implemenation or token parser (source: anytype argument in innerParse) may change at any moment.
What's your opinion on the second option?
P.S. 4th option is to use the arrays ([]T) instead of lists (ArrayList(T))
P.P.S. jsonParse method in example above is bad written one - json.Parsed.deinit frees allocated slice, there should Allocator.dupe call or some sort of ownership interception (if it's possible in Zig standard library)
Hello again, sorry for not looking into it yet, I have limited braincells and time available lately.
This is very interesting, but I'd prefer if we could manage to do something that avoid users having to deal with specific types. ManagedString is already something that annoys me a bit, but it's a necessary evil.
Instead of relying on auto parsing, does the std.json library allows for some kind of token/stream based parsing?
Ideally I'd like that kind of process:
- generate at compile time parsing rules
- start iterating over the json tokens (as key-value pairs)
- check if key exist, if it does check value is OK according to parsing rules. If OK, set value in structure field
I think that's what I tried on a branch somewhere, but it was a bit of time ago already.
Nevertheless, I'm grateful for your efforts, interest, and time investment. I'll try to look, I might have some time Thursday.
Hi, thanks for feedback. It's not an urgent case for me, just wanted to use your lib in side project which i don't have enough will to start right now anyway)
Yes, std.json internally has a token parser, but it's for general purpose. I guess you want to add another auxiliary field in generated classes, something like _desc_table (or maybe even reuse this one, didn't know about details) - didn't think about it, might be a good idea.
I'm still thinking about 4th option - with ArrayList(T) -> []T replacement and some extra methods for ManagedString it will be done (i hope), but of course it limits user flexibility (if someone needs ArrayList's, there will be lots of toOwnedSlice/fromOwnedSlice calls).
pub fn SerializableArrayList(comptime T: type) type { return struct { usingnamespace ArrayList(T);
@librolibro usingnamespace
only adds the methods, not the fields; and the self parameters on would be wrong.
Why do we need ArrayList at all vs just a slice?
Hello @james-callahan,
Thanks for your input. We have been working on the subject (mostly @librolibro to be fair) on #49.
We opted for the third option in @librolibro 's analysis:
Add jsonParse/jsonStringify methods for every generated struct (or for MessageMixins one). This is also bad because you need to reimplement part or innerParse function because it using non-public functions under the hood and inner implemenation or token parser (source: anytype argument in innerParse) may change at any moment.
The answer to your question about ArrayList is probably that i made early in the project to represent repeated fields as ArrayList.
Yes, @james-callahan, I had big hopes on usingnamespace
earlier - I thought it could be used for struct inheritance or something like that. MessageMixins is the great example of its usage for sure, but tbh I can't think of any other useful use cases for it now.
Slice usage was the 4th possible outcome I described above in this thread. I used protobuf earlier mainly with Python which using not built-in lists but wrappers around it and I think it's more flexible. Ofc if there would be slices instead of ArrayLists user could can make ArrayList and pass toOwnedSlice() to struct constructor, and also some parts of code could be smaller and less verbose:
fn initIfThereWouldBeASlices() {
const pb = SomeMessage{.repeated_field = .{1, 2, 3, 4, 5}};
}
fn initInOldWay() {
var field_array_list = ArrayList(there_is_a_child_type).init(also_an_allocator);
try field_array_list.append(1);
try field_array_list.append(2);
try field_array_list.append(3);
try field_array_list.append(4);
try field_array_list.append(5);
const pb = SomeMessage{.repeated_field = field_array_list;
}
Well, it's also a topic for discussion (personally i'm not against current ArrayList usage)
P.S. Another advantage in favor of slices was the fact that I wouldn't need to implement jsonParse/jsonStrinfigy methods for structs since slices are processed correctly in std.json. But we recently found out that specs require to use camelCase name convention for JSON fields and now we need to implement our own jsonStrinfigy for name conversion anyway. Also we need jsonParse to handle both snake_case and camelCase cases while deserializing the JSON string.