Memory leak and not constant response time
SebastienAndreo opened this issue · 5 comments
The memory consumption and the execution time increase if the builder is created at each iteration
Using the master/latest version
Example Schema and code
from memory_profiler import profile
import python_jsonschema_objects as pjs
import json
import gc
import time
schema_str = """
{
"title": "Person",
"type": "object",
"properties": {
"firstName": {
"type": "string",
"description": "The person's first name."
},
"lastName": {
"type": "string",
"description": "The person's last name."
},
"age": {
"description": "Age in years which must be equal to or greater than zero.",
"type": "integer",
"minimum": 0
}
}
}
"""
instance_str = """
{
"firstName": "John",
"lastName": "Doe",
"age": 21
}
"""
obj_array = []
# Builder created as a Singleton
#builder = pjs.ObjectBuilder(json.loads(schema_str))
#ns = builder.build_classes()
@profile
def test(n):
t = time.time()
for i in range(1, n):
# Builder created at each iteration
builder = pjs.ObjectBuilder(json.loads(schema_str))
ns = builder.build_classes()
a = ns.Person.from_json(instance_str)
obj_array.append(a)
print('sec:', time.time()-t)
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
Line # Mem usage Increment Occurences Line Contents
============================================================
43 25.0 MiB 25.0 MiB 1 @profile
44 def test(n):
45 25.0 MiB 0.0 MiB 1 t = time.time()
46 29.7 MiB 0.0 MiB 100 for i in range(1, n):
47 29.7 MiB 1.1 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 29.7 MiB 0.5 MiB 99 ns = builder.build_classes()
49 29.7 MiB 3.1 MiB 99 a = ns.Person.from_json(instance_str)
50 29.7 MiB 0.0 MiB 99 obj_array.append(a)
51 29.7 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 1.3166437149047852
Filename: /home/seb/memory.py
Line # Mem usage Increment Occurences Line Contents
============================================================
43 29.7 MiB 29.7 MiB 1 @profile
44 def test(n):
45 29.7 MiB 0.0 MiB 1 t = time.time()
46 42.0 MiB 0.0 MiB 100 for i in range(1, n):
47 41.7 MiB 0.8 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 41.7 MiB 0.3 MiB 99 ns = builder.build_classes()
49 42.0 MiB 11.2 MiB 99 a = ns.Person.from_json(instance_str)
50 42.0 MiB 0.0 MiB 99 obj_array.append(a)
51 42.0 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 1.4638645648956299
Filename: /home/seb/memory.py
Line # Mem usage Increment Occurences Line Contents
============================================================
43 42.0 MiB 42.0 MiB 1 @profile
44 def test(n):
45 42.0 MiB 0.0 MiB 1 t = time.time()
46 59.1 MiB 0.0 MiB 100 for i in range(1, n):
47 58.9 MiB 1.2 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 58.9 MiB 0.8 MiB 99 ns = builder.build_classes()
49 59.1 MiB 15.2 MiB 99 a = ns.Person.from_json(instance_str)
50 59.1 MiB 0.0 MiB 99 obj_array.append(a)
51 59.1 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 1.66902494430542
Filename: /home/seb/memory.py
Line # Mem usage Increment Occurences Line Contents
============================================================
43 59.1 MiB 59.1 MiB 1 @profile
44 def test(n):
45 59.1 MiB 0.0 MiB 1 t = time.time()
46 81.3 MiB 0.0 MiB 100 for i in range(1, n):
47 81.1 MiB 1.2 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 81.1 MiB 0.0 MiB 99 ns = builder.build_classes()
49 81.3 MiB 21.0 MiB 99 a = ns.Person.from_json(instance_str)
50 81.3 MiB 0.0 MiB 99 obj_array.append(a)
51 81.3 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 1.8282761573791504
Filename: /home/seb/memory.py
Line # Mem usage Increment Occurences Line Contents
============================================================
43 81.3 MiB 81.3 MiB 1 @profile
44 def test(n):
45 81.3 MiB 0.0 MiB 1 t = time.time()
46 117.2 MiB 0.0 MiB 100 for i in range(1, n):
47 116.7 MiB 1.0 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 116.7 MiB 0.4 MiB 99 ns = builder.build_classes()
49 117.2 MiB 34.5 MiB 99 a = ns.Person.from_json(instance_str)
50 117.2 MiB 0.0 MiB 99 obj_array.append(a)
51 117.2 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 2.0199222564697266
Filename: /home/seb/memory.py
Line # Mem usage Increment Occurences Line Contents
============================================================
43 117.2 MiB 117.2 MiB 1 @profile
44 def test(n):
45 117.2 MiB 0.0 MiB 1 t = time.time()
46 158.7 MiB 0.0 MiB 100 for i in range(1, n):
47 158.2 MiB 1.8 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 158.2 MiB 0.6 MiB 99 ns = builder.build_classes()
49 158.7 MiB 39.2 MiB 99 a = ns.Person.from_json(instance_str)
50 158.7 MiB 0.0 MiB 99 obj_array.append(a)
51 158.7 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 2.4503257274627686
Filename: /home/seb/memory.py
Line # Mem usage Increment Occurences Line Contents
============================================================
43 158.7 MiB 158.7 MiB 1 @profile
44 def test(n):
45 158.7 MiB 0.0 MiB 1 t = time.time()
46 205.2 MiB 0.0 MiB 100 for i in range(1, n):
47 205.0 MiB 1.9 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 205.0 MiB 0.0 MiB 99 ns = builder.build_classes()
49 205.2 MiB 44.6 MiB 99 a = ns.Person.from_json(instance_str)
50 205.2 MiB 0.0 MiB 99 obj_array.append(a)
51 205.2 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 2.3949639797210693
Filename: /home/seb/memory.py
Line # Mem usage Increment Occurences Line Contents
============================================================
43 205.2 MiB 205.2 MiB 1 @profile
44 def test(n):
45 205.2 MiB 0.0 MiB 1 t = time.time()
46 257.1 MiB 0.0 MiB 100 for i in range(1, n):
47 256.6 MiB 1.8 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 256.6 MiB 0.4 MiB 99 ns = builder.build_classes()
49 257.1 MiB 49.7 MiB 99 a = ns.Person.from_json(instance_str)
50 257.1 MiB 0.0 MiB 99 obj_array.append(a)
51 257.1 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 2.855459213256836
Filename: /home/seb/memory.py
Line # Mem usage Increment Occurences Line Contents
============================================================
43 257.1 MiB 257.1 MiB 1 @profile
44 def test(n):
45 257.1 MiB 0.0 MiB 1 t = time.time()
46 313.9 MiB 0.0 MiB 100 for i in range(1, n):
47 313.1 MiB 1.0 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 313.1 MiB 0.0 MiB 99 ns = builder.build_classes()
49 313.9 MiB 55.7 MiB 99 a = ns.Person.from_json(instance_str)
50 313.9 MiB 0.1 MiB 99 obj_array.append(a)
51 313.9 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 2.741384506225586
Filename: /home/seb/memory.py
Line # Mem usage Increment Occurences Line Contents
============================================================
43 313.9 MiB 313.9 MiB 1 @profile
44 def test(n):
45 313.9 MiB 0.0 MiB 1 t = time.time()
46 375.9 MiB 0.0 MiB 100 for i in range(1, n):
47 375.3 MiB 1.4 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 375.3 MiB 1.2 MiB 99 ns = builder.build_classes()
49 375.9 MiB 59.3 MiB 99 a = ns.Person.from_json(instance_str)
50 375.9 MiB 0.0 MiB 99 obj_array.append(a)
51 375.9 MiB 0.0 MiB 1 print('sec:', time.time()-t)
Expected behavior
the memory pressure created by the instantiation of a Person Class should stay constant and the execution time also.
I don't agree. You're creating a whole new object builder from the schema every time, and not necessarily cleaning up the old one. That means it's re-parsing the JSON and recreating the entire class type hierarchy each time.
Why wouldn't this increase memory consumption? There's no way for the code to know you're loading the same schema over and over again.
Do you have a valid use case for doing this?
That the memory increases globally is not the real problem and you are right for that point. My observation is more: the memory instantiation cost of a new person: 'a = ns.Person.from_json(instance_str)' is not constant. I reran the test with 18 iterations
and as you can see the memory increment (2.7,11.5,15.2,20.1,34.8,39.2,45.2,49.5,54.6,60.1,64.6,69.1,74.3,79.7,84.1,90.2,112.1,136.4) is following a linear function which will mean a quadratic function for the global system.
So the question is Why the instantiation of a new class from a Json cost more and more memory?
@cwacek can you comment? You also mentioned, "not necessarily cleaning up the old one" I haven't seen cleanup methods, how can I do that ?
TLDR: I looked into this further, and I'm convinced this is a non-issue. If you take the build_classes()
call out of the loop, there is nearly zero increase in memory usage. I'm closing this issue unless you can tell me a reasonable use case for building classes in each loop iteration
Details
Here's what it looks like if you build the classes outside of the loop (and thus get to re-use the constructed classes):
@profile
def test(n):
t = time.time()
builder = pjs.ObjectBuilder(json.loads(schema_str))
ns = builder.build_classes()
for i in range(1, n):
a = ns.Person.from_json(instance_str)
obj_array.append(a)
print('sec:', time.time()-t)
Result:
sec: 0.14585113525390625
Filename: test/test_210_memoryuse.py
Line # Mem usage Increment Occurences Line Contents
============================================================
44 27.3 MiB 27.3 MiB 1 @profile
45 def test(n):
46 27.3 MiB 0.0 MiB 1 t = time.time()
47 27.3 MiB 0.0 MiB 1 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 27.3 MiB 0.0 MiB 1 ns = builder.build_classes()
49 27.4 MiB 0.0 MiB 100 for i in range(1, n):
50 # Builder created at each iteration
51 #builder = pjs.ObjectBuilder(json.loads(schema_str))
52 #ns = builder.build_classes()
53 27.4 MiB 0.1 MiB 99 a = ns.Person.from_json(instance_str)
54 27.4 MiB 0.0 MiB 99 obj_array.append(a)
55 27.4 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 0.13786005973815918
Filename: test/test_210_memoryuse.py
Line # Mem usage Increment Occurences Line Contents
============================================================
44 27.4 MiB 27.4 MiB 1 @profile
45 def test(n):
46 27.4 MiB 0.0 MiB 1 t = time.time()
47 27.5 MiB 0.0 MiB 1 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 27.5 MiB 0.0 MiB 1 ns = builder.build_classes()
49 27.6 MiB 0.0 MiB 100 for i in range(1, n):
50 # Builder created at each iteration
51 #builder = pjs.ObjectBuilder(json.loads(schema_str))
52 #ns = builder.build_classes()
53 27.6 MiB 0.1 MiB 99 a = ns.Person.from_json(instance_str)
54 27.6 MiB 0.0 MiB 99 obj_array.append(a)
55 27.6 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 0.13791322708129883
Filename: test/test_210_memoryuse.py
Line # Mem usage Increment Occurences Line Contents
============================================================
44 27.6 MiB 27.6 MiB 1 @profile
45 def test(n):
46 27.6 MiB 0.0 MiB 1 t = time.time()
47 27.6 MiB 0.0 MiB 1 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 27.6 MiB 0.0 MiB 1 ns = builder.build_classes()
49 27.7 MiB 0.0 MiB 100 for i in range(1, n):
50 # Builder created at each iteration
51 #builder = pjs.ObjectBuilder(json.loads(schema_str))
52 #ns = builder.build_classes()
53 27.7 MiB 0.1 MiB 99 a = ns.Person.from_json(instance_str)
54 27.7 MiB 0.0 MiB 99 obj_array.append(a)
55 27.7 MiB 0.0 MiB 1 print('sec:', time.time()-t)
sec: 0.1420879364013672
Filename: test/test_210_memoryuse.py
Line # Mem usage Increment Occurences Line Contents
============================================================
44 27.7 MiB 27.7 MiB 1 @profile
45 def test(n):
46 27.7 MiB 0.0 MiB 1 t = time.time()
47 27.8 MiB 0.0 MiB 1 builder = pjs.ObjectBuilder(json.loads(schema_str))
48 27.8 MiB 0.0 MiB 1 ns = builder.build_classes()
49 27.9 MiB 0.0 MiB 100 for i in range(1, n):
50 # Builder created at each iteration
51 #builder = pjs.ObjectBuilder(json.loads(schema_str))
52 #ns = builder.build_classes()
53 27.9 MiB 0.1 MiB 99 a = ns.Person.from_json(instance_str)
54 27.9 MiB 0.0 MiB 99 obj_array.append(a)
55 27.9 MiB 0.0 MiB 1 print('sec:', time.time()-t)
This tells me as I suspected that it's building the classes that incurs the memory overhead, and by extension, the the memory_profiler is slightly off when it reports it by line. This wouldn't be surprising if we consider that build_classes() constructs a bunch of types but doesn't actually instantiate them - there might be extra bookkeeping and overhead if you actually instantiate a type.
To validate this I adjusted the function to call build_classes
, then instantiate two objects. As I suspected, only the first object creation uses any appreciable memory.
Line # Mem usage Increment Occurences Line Contents
============================================================
44 61.8 MiB 61.8 MiB 1 @profile
45 def test(n):
46 61.8 MiB 0.0 MiB 1 t = time.time()
47 #builder = pjs.ObjectBuilder(json.loads(schema_str))
48 #ns = builder.build_classes()
49 84.6 MiB 0.0 MiB 100 for i in range(1, n):
50 # Builder created at each iteration
51 84.3 MiB 1.4 MiB 99 builder = pjs.ObjectBuilder(json.loads(schema_str))
52 84.3 MiB 0.4 MiB 99 ns = builder.build_classes()
53 84.6 MiB 20.9 MiB 99 a = ns.Person.from_json(instance_str)
54 84.6 MiB 0.0 MiB 99 obj_array.append(a)
55 84.6 MiB 0.0 MiB 99 b = ns.Person.from_json(instance_str)
56 84.6 MiB 0.0 MiB 99 obj_array.append(b)
57 84.6 MiB 0.0 MiB 1 print('sec:', time.time()-t)